4.9-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 19 Nov 2018 11:06:08 +0000 (12:06 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 19 Nov 2018 11:06:08 +0000 (12:06 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 19 Nov 2018 11:06:08 +0000 (12:06 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 19 Nov 2018 11:06:08 +0000 (12:06 +0100)
diff --git a/queue-4.9/clk-at91-fix-division-by-zero-in-pll-recalc_rate.patch b/queue-4.9/clk-at91-fix-division-by-zero-in-pll-recalc_rate.patch

new file mode 100644 (file)

index 0000000..4aa88a7
--- /dev/null
+++ b/queue-4.9/clk-at91-fix-division-by-zero-in-pll-recalc_rate.patch
@@ -0,0 +1,37 @@
+From 0f5cb0e6225cae2f029944cb8c74617aab6ddd49 Mon Sep 17 00:00:00 2001
+From: Ronald Wahl <rwahl@gmx.de>
+Date: Wed, 10 Oct 2018 15:54:54 +0200
+Subject: clk: at91: Fix division by zero in PLL recalc_rate()
+
+From: Ronald Wahl <rwahl@gmx.de>
+
+commit 0f5cb0e6225cae2f029944cb8c74617aab6ddd49 upstream.
+
+Commit a982e45dc150 ("clk: at91: PLL recalc_rate() now using cached MUL
+and DIV values") removed a check that prevents a division by zero. This
+now causes a stacktrace when booting the kernel on a at91 platform if
+the PLL DIV register contains zero. This commit reintroduces this check.
+
+Fixes: a982e45dc150 ("clk: at91: PLL recalc_rate() now using cached...")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Ronald Wahl <rwahl@gmx.de>
+Acked-by: Ludovic Desroches <ludovic.desroches@microchip.com>
+Signed-off-by: Stephen Boyd <sboyd@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/clk/at91/clk-pll.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/clk/at91/clk-pll.c
++++ b/drivers/clk/at91/clk-pll.c
+@@ -133,6 +133,9 @@ static unsigned long clk_pll_recalc_rate
+ {
+       struct clk_pll *pll = to_clk_pll(hw);
+ 
++      if (!pll->div || !pll->mul)
++              return 0;
++
+       return (parent_rate / pll->div) * (pll->mul + 1);
+ }
+ 
diff --git a/queue-4.9/clk-rockchip-fix-static-checker-warning-in-rockchip_ddrclk_get_parent-call.patch b/queue-4.9/clk-rockchip-fix-static-checker-warning-in-rockchip_ddrclk_get_parent-call.patch

new file mode 100644 (file)

index 0000000..d6a28fc
--- /dev/null
+++ b/queue-4.9/clk-rockchip-fix-static-checker-warning-in-rockchip_ddrclk_get_parent-call.patch
@@ -0,0 +1,45 @@
+From 665636b2940d0897c4130253467f5e8c42eea392 Mon Sep 17 00:00:00 2001
+From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
+Date: Tue, 16 Oct 2018 15:41:44 +0200
+Subject: clk: rockchip: Fix static checker warning in rockchip_ddrclk_get_parent call
+
+From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
+
+commit 665636b2940d0897c4130253467f5e8c42eea392 upstream.
+
+Fixes the signedness bug returning '(-22)' on the return type by removing the
+sanity checker in rockchip_ddrclk_get_parent(). The function should return
+and unsigned value only and it's safe to remove the sanity checker as the
+core functions that call get_parent like clk_core_get_parent_by_index already
+ensures the validity of the clk index returned (index >= core->num_parents).
+
+Fixes: a4f182bf81f18 ("clk: rockchip: add new clock-type for the ddrclk")
+Cc: stable@vger.kernel.org
+Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
+Reviewed-by: Stephen Boyd <sboyd@kernel.org>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/clk/rockchip/clk-ddr.c |    4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/drivers/clk/rockchip/clk-ddr.c
++++ b/drivers/clk/rockchip/clk-ddr.c
+@@ -80,16 +80,12 @@ static long rockchip_ddrclk_sip_round_ra
+ static u8 rockchip_ddrclk_get_parent(struct clk_hw *hw)
+ {
+       struct rockchip_ddrclk *ddrclk = to_rockchip_ddrclk_hw(hw);
+-      int num_parents = clk_hw_get_num_parents(hw);
+       u32 val;
+ 
+       val = clk_readl(ddrclk->reg_base +
+                       ddrclk->mux_offset) >> ddrclk->mux_shift;
+       val &= GENMASK(ddrclk->mux_width - 1, 0);
+ 
+-      if (val >= num_parents)
+-              return -EINVAL;
+-
+       return val;
+ }
+ 
diff --git a/queue-4.9/clk-s2mps11-fix-matching-when-built-as-module-and-dt-node-contains-compatible.patch b/queue-4.9/clk-s2mps11-fix-matching-when-built-as-module-and-dt-node-contains-compatible.patch

new file mode 100644 (file)

index 0000000..e66bb66
--- /dev/null
+++ b/queue-4.9/clk-s2mps11-fix-matching-when-built-as-module-and-dt-node-contains-compatible.patch
@@ -0,0 +1,75 @@
+From 8985167ecf57f97061599a155bb9652c84ea4913 Mon Sep 17 00:00:00 2001
+From: Krzysztof Kozlowski <krzk@kernel.org>
+Date: Wed, 29 Aug 2018 21:20:10 +0200
+Subject: clk: s2mps11: Fix matching when built as module and DT node contains compatible
+
+From: Krzysztof Kozlowski <krzk@kernel.org>
+
+commit 8985167ecf57f97061599a155bb9652c84ea4913 upstream.
+
+When driver is built as module and DT node contains clocks compatible
+(e.g. "samsung,s2mps11-clk"), the module will not be autoloaded because
+module aliases won't match.
+
+The modalias from uevent: of:NclocksT<NULL>Csamsung,s2mps11-clk
+The modalias from driver: platform:s2mps11-clk
+
+The devices are instantiated by parent's MFD.  However both Device Tree
+bindings and parent define the compatible for clocks devices.  In case
+of module matching this DT compatible will be used.
+
+The issue will not happen if this is a built-in (no need for module
+matching) or when clocks DT node does not contain compatible (not
+correct from bindings perspective but working for driver).
+
+Note when backporting to stable kernels: adjust the list of device ID
+entries.
+
+Cc: <stable@vger.kernel.org>
+Fixes: 53c31b3437a6 ("mfd: sec-core: Add of_compatible strings for clock MFD cells")
+Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
+Acked-by: Stephen Boyd <sboyd@kernel.org>
+Signed-off-by: Stephen Boyd <sboyd@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+diff --git a/drivers/clk/clk-s2mps11.c b/drivers/clk/clk-s2mps11.c
+index d44e0eea31ec..0934d3724495 100644
+--- a/drivers/clk/clk-s2mps11.c
++++ b/drivers/clk/clk-s2mps11.c
+@@ -245,6 +245,36 @@ static const struct platform_device_id s2mps11_clk_id[] = {
+ };
+ MODULE_DEVICE_TABLE(platform, s2mps11_clk_id);
+ 
++#ifdef CONFIG_OF
++/*
++ * Device is instantiated through parent MFD device and device matching is done
++ * through platform_device_id.
++ *
++ * However if device's DT node contains proper clock compatible and driver is
++ * built as a module, then the *module* matching will be done trough DT aliases.
++ * This requires of_device_id table.  In the same time this will not change the
++ * actual *device* matching so do not add .of_match_table.
++ */
++static const struct of_device_id s2mps11_dt_match[] = {
++      {
++              .compatible = "samsung,s2mps11-clk",
++              .data = (void *)S2MPS11X,
++      }, {
++              .compatible = "samsung,s2mps13-clk",
++              .data = (void *)S2MPS13X,
++      }, {
++              .compatible = "samsung,s2mps14-clk",
++              .data = (void *)S2MPS14X,
++      }, {
++              .compatible = "samsung,s5m8767-clk",
++              .data = (void *)S5M8767X,
++      }, {
++              /* Sentinel */
++      },
++};
++MODULE_DEVICE_TABLE(of, s2mps11_dt_match);
++#endif
++
+ static struct platform_driver s2mps11_clk_driver = {
+       .driver = {
+               .name  = "s2mps11-clk",
diff --git a/queue-4.9/libceph-bump-ceph_msg_max_data_len.patch b/queue-4.9/libceph-bump-ceph_msg_max_data_len.patch

new file mode 100644 (file)

index 0000000..581f1dd
--- /dev/null
+++ b/queue-4.9/libceph-bump-ceph_msg_max_data_len.patch
@@ -0,0 +1,42 @@
+From 94e6992bb560be8bffb47f287194adf070b57695 Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Wed, 26 Sep 2018 18:03:16 +0200
+Subject: libceph: bump CEPH_MSG_MAX_DATA_LEN
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 94e6992bb560be8bffb47f287194adf070b57695 upstream.
+
+If the read is large enough, we end up spinning in the messenger:
+
+  libceph: osd0 192.168.122.1:6801 io error
+  libceph: osd0 192.168.122.1:6801 io error
+  libceph: osd0 192.168.122.1:6801 io error
+
+This is a receive side limit, so only reads were affected.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/ceph/libceph.h |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/include/linux/ceph/libceph.h
++++ b/include/linux/ceph/libceph.h
+@@ -77,7 +77,13 @@ struct ceph_options {
+ 
+ #define CEPH_MSG_MAX_FRONT_LEN        (16*1024*1024)
+ #define CEPH_MSG_MAX_MIDDLE_LEN       (16*1024*1024)
+-#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024)
++
++/*
++ * Handle the largest possible rbd object in one message.
++ * There is no limit on the size of cephfs objects, but it has to obey
++ * rsize and wsize mount options anyway.
++ */
++#define CEPH_MSG_MAX_DATA_LEN (32*1024*1024)
+ 
+ #define CEPH_AUTH_NAME_DEFAULT   "guest"
+ 
diff --git a/queue-4.9/mach64-fix-display-corruption-on-big-endian-machines.patch b/queue-4.9/mach64-fix-display-corruption-on-big-endian-machines.patch

new file mode 100644 (file)

index 0000000..6cf377b
--- /dev/null
+++ b/queue-4.9/mach64-fix-display-corruption-on-big-endian-machines.patch
@@ -0,0 +1,59 @@
+From 3c6c6a7878d00a3ac997a779c5b9861ff25dfcc8 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Mon, 8 Oct 2018 12:57:34 +0200
+Subject: mach64: fix display corruption on big endian machines
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 3c6c6a7878d00a3ac997a779c5b9861ff25dfcc8 upstream.
+
+The code for manual bit triple is not endian-clean. It builds the variable
+"hostdword" using byte accesses, therefore we must read the variable with
+"le32_to_cpu".
+
+The patch also enables (hardware or software) bit triple only if the image
+is monochrome (image->depth). If we want to blit full-color image, we
+shouldn't use the triple code.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Reviewed-by: Ville Syrjälä <syrjala@sci.fi>
+Cc: stable@vger.kernel.org
+Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/video/fbdev/aty/mach64_accel.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/video/fbdev/aty/mach64_accel.c
++++ b/drivers/video/fbdev/aty/mach64_accel.c
+@@ -344,7 +344,7 @@ void atyfb_imageblit(struct fb_info *inf
+                * since Rage 3D IIc we have DP_HOST_TRIPLE_EN bit
+                * this hwaccelerated triple has an issue with not aligned data
+                */
+-              if (M64_HAS(HW_TRIPLE) && image->width % 8 == 0)
++              if (image->depth == 1 && M64_HAS(HW_TRIPLE) && image->width % 8 == 0)
+                       pix_width |= DP_HOST_TRIPLE_EN;
+       }
+ 
+@@ -381,7 +381,7 @@ void atyfb_imageblit(struct fb_info *inf
+       src_bytes = (((image->width * image->depth) + 7) / 8) * image->height;
+ 
+       /* manual triple each pixel */
+-      if (info->var.bits_per_pixel == 24 && !(pix_width & DP_HOST_TRIPLE_EN)) {
++      if (image->depth == 1 && info->var.bits_per_pixel == 24 && !(pix_width & DP_HOST_TRIPLE_EN)) {
+               int inbit, outbit, mult24, byte_id_in_dword, width;
+               u8 *pbitmapin = (u8*)image->data, *pbitmapout;
+               u32 hostdword;
+@@ -414,7 +414,7 @@ void atyfb_imageblit(struct fb_info *inf
+                               }
+                       }
+                       wait_for_fifo(1, par);
+-                      aty_st_le32(HOST_DATA0, hostdword, par);
++                      aty_st_le32(HOST_DATA0, le32_to_cpu(hostdword), par);
+               }
+       } else {
+               u32 *pbitmap, dwords = (src_bytes + 3) / 4;
diff --git a/queue-4.9/mach64-fix-image-corruption-due-to-reading-accelerator-registers.patch b/queue-4.9/mach64-fix-image-corruption-due-to-reading-accelerator-registers.patch

new file mode 100644 (file)

index 0000000..96b3927
--- /dev/null
+++ b/queue-4.9/mach64-fix-image-corruption-due-to-reading-accelerator-registers.patch
@@ -0,0 +1,114 @@
+From c09bcc91bb94ed91f1391bffcbe294963d605732 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Mon, 8 Oct 2018 12:57:35 +0200
+Subject: mach64: fix image corruption due to reading accelerator registers
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit c09bcc91bb94ed91f1391bffcbe294963d605732 upstream.
+
+Reading the registers without waiting for engine idle returns
+unpredictable values. These unpredictable values result in display
+corruption - if atyfb_imageblit reads the content of DP_PIX_WIDTH with the
+bit DP_HOST_TRIPLE_EN set (from previous invocation), the driver would
+never ever clear the bit, resulting in display corruption.
+
+We don't want to wait for idle because it would degrade performance, so
+this patch modifies the driver so that it never reads accelerator
+registers.
+
+HOST_CNTL doesn't have to be read, we can just write it with
+HOST_BYTE_ALIGN because no other part of the driver cares if
+HOST_BYTE_ALIGN is set.
+
+DP_PIX_WIDTH is written in the functions atyfb_copyarea and atyfb_fillrect
+with the default value and in atyfb_imageblit with the value set according
+to the source image data.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Reviewed-by: Ville Syrjälä <syrjala@sci.fi>
+Cc: stable@vger.kernel.org
+Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/video/fbdev/aty/mach64_accel.c |   22 +++++++++-------------
+ 1 file changed, 9 insertions(+), 13 deletions(-)
+
+--- a/drivers/video/fbdev/aty/mach64_accel.c
++++ b/drivers/video/fbdev/aty/mach64_accel.c
+@@ -126,7 +126,7 @@ void aty_init_engine(struct atyfb_par *p
+ 
+       /* set host attributes */
+       wait_for_fifo(13, par);
+-      aty_st_le32(HOST_CNTL, 0, par);
++      aty_st_le32(HOST_CNTL, HOST_BYTE_ALIGN, par);
+ 
+       /* set pattern attributes */
+       aty_st_le32(PAT_REG0, 0, par);
+@@ -232,7 +232,8 @@ void atyfb_copyarea(struct fb_info *info
+               rotation = rotation24bpp(dx, direction);
+       }
+ 
+-      wait_for_fifo(4, par);
++      wait_for_fifo(5, par);
++      aty_st_le32(DP_PIX_WIDTH, par->crtc.dp_pix_width, par);
+       aty_st_le32(DP_SRC, FRGD_SRC_BLIT, par);
+       aty_st_le32(SRC_Y_X, (sx << 16) | sy, par);
+       aty_st_le32(SRC_HEIGHT1_WIDTH1, (width << 16) | area->height, par);
+@@ -268,7 +269,8 @@ void atyfb_fillrect(struct fb_info *info
+               rotation = rotation24bpp(dx, DST_X_LEFT_TO_RIGHT);
+       }
+ 
+-      wait_for_fifo(3, par);
++      wait_for_fifo(4, par);
++      aty_st_le32(DP_PIX_WIDTH, par->crtc.dp_pix_width, par);
+       aty_st_le32(DP_FRGD_CLR, color, par);
+       aty_st_le32(DP_SRC,
+                   BKGD_SRC_BKGD_CLR | FRGD_SRC_FRGD_CLR | MONO_SRC_ONE,
+@@ -283,7 +285,7 @@ void atyfb_imageblit(struct fb_info *inf
+ {
+       struct atyfb_par *par = (struct atyfb_par *) info->par;
+       u32 src_bytes, dx = image->dx, dy = image->dy, width = image->width;
+-      u32 pix_width_save, pix_width, host_cntl, rotation = 0, src, mix;
++      u32 pix_width, rotation = 0, src, mix;
+ 
+       if (par->asleep)
+               return;
+@@ -295,8 +297,7 @@ void atyfb_imageblit(struct fb_info *inf
+               return;
+       }
+ 
+-      pix_width = pix_width_save = aty_ld_le32(DP_PIX_WIDTH, par);
+-      host_cntl = aty_ld_le32(HOST_CNTL, par) | HOST_BYTE_ALIGN;
++      pix_width = par->crtc.dp_pix_width;
+ 
+       switch (image->depth) {
+       case 1:
+@@ -369,12 +370,11 @@ void atyfb_imageblit(struct fb_info *inf
+               mix = FRGD_MIX_D_XOR_S | BKGD_MIX_D;
+       }
+ 
+-      wait_for_fifo(6, par);
+-      aty_st_le32(DP_WRITE_MASK, 0xFFFFFFFF, par);
++      wait_for_fifo(5, par);
+       aty_st_le32(DP_PIX_WIDTH, pix_width, par);
+       aty_st_le32(DP_MIX, mix, par);
+       aty_st_le32(DP_SRC, src, par);
+-      aty_st_le32(HOST_CNTL, host_cntl, par);
++      aty_st_le32(HOST_CNTL, HOST_BYTE_ALIGN, par);
+       aty_st_le32(DST_CNTL, DST_Y_TOP_TO_BOTTOM | DST_X_LEFT_TO_RIGHT | rotation, par);
+ 
+       draw_rect(dx, dy, width, image->height, par);
+@@ -423,8 +423,4 @@ void atyfb_imageblit(struct fb_info *inf
+                       aty_st_le32(HOST_DATA0, get_unaligned_le32(pbitmap), par);
+               }
+       }
+-
+-      /* restore pix_width */
+-      wait_for_fifo(1, par);
+-      aty_st_le32(DP_PIX_WIDTH, pix_width_save, par);
+ }
diff --git a/queue-4.9/mm-thp-relax-__gfp_thisnode-for-madv_hugepage-mappings.patch b/queue-4.9/mm-thp-relax-__gfp_thisnode-for-madv_hugepage-mappings.patch

new file mode 100644 (file)

index 0000000..f9fdbb9
--- /dev/null
+++ b/queue-4.9/mm-thp-relax-__gfp_thisnode-for-madv_hugepage-mappings.patch
@@ -0,0 +1,228 @@
+From ac5b2c18911ffe95c08d69273917f90212cf5659 Mon Sep 17 00:00:00 2001
+From: Andrea Arcangeli <aarcange@redhat.com>
+Date: Fri, 2 Nov 2018 15:47:59 -0700
+Subject: mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings
+
+From: Andrea Arcangeli <aarcange@redhat.com>
+
+commit ac5b2c18911ffe95c08d69273917f90212cf5659 upstream.
+
+THP allocation might be really disruptive when allocated on NUMA system
+with the local node full or hard to reclaim.  Stefan has posted an
+allocation stall report on 4.12 based SLES kernel which suggests the
+same issue:
+
+  kvm: page allocation stalls for 194572ms, order:9, mode:0x4740ca(__GFP_HIGHMEM|__GFP_IO|__GFP_FS|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE|__GFP_MOVABLE|__GFP_DIRECT_RECLAIM), nodemask=(null)
+  kvm cpuset=/ mems_allowed=0-1
+  CPU: 10 PID: 84752 Comm: kvm Tainted: G        W 4.12.0+98-ph <a href="/view.php?id=1" title="[geschlossen] Integration Ramdisk" class="resolved">0000001</a> SLE15 (unreleased)
+  Hardware name: Supermicro SYS-1029P-WTRT/X11DDW-NT, BIOS 2.0 12/05/2017
+  Call Trace:
+   dump_stack+0x5c/0x84
+   warn_alloc+0xe0/0x180
+   __alloc_pages_slowpath+0x820/0xc90
+   __alloc_pages_nodemask+0x1cc/0x210
+   alloc_pages_vma+0x1e5/0x280
+   do_huge_pmd_wp_page+0x83f/0xf00
+   __handle_mm_fault+0x93d/0x1060
+   handle_mm_fault+0xc6/0x1b0
+   __do_page_fault+0x230/0x430
+   do_page_fault+0x2a/0x70
+   page_fault+0x7b/0x80
+   [...]
+  Mem-Info:
+  active_anon:126315487 inactive_anon:1612476 isolated_anon:5
+   active_file:60183 inactive_file:245285 isolated_file:0
+   unevictable:15657 dirty:286 writeback:1 unstable:0
+   slab_reclaimable:75543 slab_unreclaimable:2509111
+   mapped:81814 shmem:31764 pagetables:370616 bounce:0
+   free:32294031 free_pcp:6233 free_cma:0
+  Node 0 active_anon:254680388kB inactive_anon:1112760kB active_file:240648kB inactive_file:981168kB unevictable:13368kB isolated(anon):0kB isolated(file):0kB mapped:280240kB dirty:1144kB writeback:0kB shmem:95832kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 81225728kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no
+  Node 1 active_anon:250583072kB inactive_anon:5337144kB active_file:84kB inactive_file:0kB unevictable:49260kB isolated(anon):20kB isolated(file):0kB mapped:47016kB dirty:0kB writeback:4kB shmem:31224kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 31897600kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no
+
+The defrag mode is "madvise" and from the above report it is clear that
+the THP has been allocated for MADV_HUGEPAGA vma.
+
+Andrea has identified that the main source of the problem is
+__GFP_THISNODE usage:
+
+: The problem is that direct compaction combined with the NUMA
+: __GFP_THISNODE logic in mempolicy.c is telling reclaim to swap very
+: hard the local node, instead of failing the allocation if there's no
+: THP available in the local node.
+:
+: Such logic was ok until __GFP_THISNODE was added to the THP allocation
+: path even with MPOL_DEFAULT.
+:
+: The idea behind the __GFP_THISNODE addition, is that it is better to
+: provide local memory in PAGE_SIZE units than to use remote NUMA THP
+: backed memory. That largely depends on the remote latency though, on
+: threadrippers for example the overhead is relatively low in my
+: experience.
+:
+: The combination of __GFP_THISNODE and __GFP_DIRECT_RECLAIM results in
+: extremely slow qemu startup with vfio, if the VM is larger than the
+: size of one host NUMA node. This is because it will try very hard to
+: unsuccessfully swapout get_user_pages pinned pages as result of the
+: __GFP_THISNODE being set, instead of falling back to PAGE_SIZE
+: allocations and instead of trying to allocate THP on other nodes (it
+: would be even worse without vfio type1 GUP pins of course, except it'd
+: be swapping heavily instead).
+
+Fix this by removing __GFP_THISNODE for THP requests which are
+requesting the direct reclaim.  This effectivelly reverts 5265047ac301
+on the grounds that the zone/node reclaim was known to be disruptive due
+to premature reclaim when there was memory free.  While it made sense at
+the time for HPC workloads without NUMA awareness on rare machines, it
+was ultimately harmful in the majority of cases.  The existing behaviour
+is similar, if not as widespare as it applies to a corner case but
+crucially, it cannot be tuned around like zone_reclaim_mode can.  The
+default behaviour should always be to cause the least harm for the
+common case.
+
+If there are specialised use cases out there that want zone_reclaim_mode
+in specific cases, then it can be built on top.  Longterm we should
+consider a memory policy which allows for the node reclaim like behavior
+for the specific memory ranges which would allow a
+
+[1] http://lkml.kernel.org/r/20180820032204.9591-1-aarcange@redhat.com
+
+Mel said:
+
+: Both patches look correct to me but I'm responding to this one because
+: it's the fix.  The change makes sense and moves further away from the
+: severe stalling behaviour we used to see with both THP and zone reclaim
+: mode.
+:
+: I put together a basic experiment with usemem configured to reference a
+: buffer multiple times that is 80% the size of main memory on a 2-socket
+: box with symmetric node sizes and defrag set to "always".  The defrag
+: setting is not the default but it would be functionally similar to
+: accessing a buffer with madvise(MADV_HUGEPAGE).  Usemem is configured to
+: reference the buffer multiple times and while it's not an interesting
+: workload, it would be expected to complete reasonably quickly as it fits
+: within memory.  The results were;
+:
+: usemem
+:                                   vanilla           noreclaim-v1
+: Amean     Elapsd-1       42.78 (   0.00%)       26.87 (  37.18%)
+: Amean     Elapsd-3       27.55 (   0.00%)        7.44 (  73.00%)
+: Amean     Elapsd-4        5.72 (   0.00%)        5.69 (   0.45%)
+:
+: This shows the elapsed time in seconds for 1 thread, 3 threads and 4
+: threads referencing buffers 80% the size of memory.  With the patches
+: applied, it's 37.18% faster for the single thread and 73% faster with two
+: threads.  Note that 4 threads showing little difference does not indicate
+: the problem is related to thread counts.  It's simply the case that 4
+: threads gets spread so their workload mostly fits in one node.
+:
+: The overall view from /proc/vmstats is more startling
+:
+:                          4.19.0-rc1  4.19.0-rc1
+:                             vanillanoreclaim-v1r1
+: Minor Faults               35593425      708164
+: Major Faults                 484088          36
+: Swap Ins                    3772837           0
+: Swap Outs                   3932295           0
+:
+: Massive amounts of swap in/out without the patch
+:
+: Direct pages scanned        6013214           0
+: Kswapd pages scanned              0           0
+: Kswapd pages reclaimed            0           0
+: Direct pages reclaimed      4033009           0
+:
+: Lots of reclaim activity without the patch
+:
+: Kswapd efficiency              100%        100%
+: Kswapd velocity               0.000       0.000
+: Direct efficiency               67%        100%
+: Direct velocity           11191.956       0.000
+:
+: Mostly from direct reclaim context as you'd expect without the patch.
+:
+: Page writes by reclaim  3932314.000       0.000
+: Page writes file                 19           0
+: Page writes anon            3932295           0
+: Page reclaim immediate        42336           0
+:
+: Writes from reclaim context is never good but the patch eliminates it.
+:
+: We should never have default behaviour to thrash the system for such a
+: basic workload.  If zone reclaim mode behaviour is ever desired but on a
+: single task instead of a global basis then the sensible option is to build
+: a mempolicy that enforces that behaviour.
+
+This was a severe regression compared to previous kernels that made
+important workloads unusable and it starts when __GFP_THISNODE was
+added to THP allocations under MADV_HUGEPAGE.  It is not a significant
+risk to go to the previous behavior before __GFP_THISNODE was added, it
+worked like that for years.
+
+This was simply an optimization to some lucky workloads that can fit in
+a single node, but it ended up breaking the VM for others that can't
+possibly fit in a single node, so going back is safe.
+
+[mhocko@suse.com: rewrote the changelog based on the one from Andrea]
+Link: http://lkml.kernel.org/r/20180925120326.24392-2-mhocko@kernel.org
+Fixes: 5265047ac301 ("mm, thp: really limit transparent hugepage allocation to local node")
+Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Reported-by: Stefan Priebe <s.priebe@profihost.ag>
+Debugged-by: Andrea Arcangeli <aarcange@redhat.com>
+Reported-by: Alex Williamson <alex.williamson@redhat.com>
+Reviewed-by: Mel Gorman <mgorman@techsingularity.net>
+Tested-by: Mel Gorman <mgorman@techsingularity.net>
+Cc: Zi Yan <zi.yan@cs.rutgers.edu>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: David Rientjes <rientjes@google.com>
+Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
+Cc: <stable@vger.kernel.org>   [4.1+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mempolicy.c |   32 ++++++++++++++++++++++++++++++--
+ 1 file changed, 30 insertions(+), 2 deletions(-)
+
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -2027,8 +2027,36 @@ retry_cpuset:
+               nmask = policy_nodemask(gfp, pol);
+               if (!nmask || node_isset(hpage_node, *nmask)) {
+                       mpol_cond_put(pol);
+-                      page = __alloc_pages_node(hpage_node,
+-                                              gfp | __GFP_THISNODE, order);
++                      /*
++                       * We cannot invoke reclaim if __GFP_THISNODE
++                       * is set. Invoking reclaim with
++                       * __GFP_THISNODE set, would cause THP
++                       * allocations to trigger heavy swapping
++                       * despite there may be tons of free memory
++                       * (including potentially plenty of THP
++                       * already available in the buddy) on all the
++                       * other NUMA nodes.
++                       *
++                       * At most we could invoke compaction when
++                       * __GFP_THISNODE is set (but we would need to
++                       * refrain from invoking reclaim even if
++                       * compaction returned COMPACT_SKIPPED because
++                       * there wasn't not enough memory to succeed
++                       * compaction). For now just avoid
++                       * __GFP_THISNODE instead of limiting the
++                       * allocation path to a strict and single
++                       * compaction invocation.
++                       *
++                       * Supposedly if direct reclaim was enabled by
++                       * the caller, the app prefers THP regardless
++                       * of the node it comes from so this would be
++                       * more desiderable behavior than only
++                       * providing THP originated from the local
++                       * node in such case.
++                       */
++                      if (!(gfp & __GFP_DIRECT_RECLAIM))
++                              gfp |= __GFP_THISNODE;
++                      page = __alloc_pages_node(hpage_node, gfp, order);
+                       goto out;
+               }
+       }
diff --git a/queue-4.9/mtd-docg3-don-t-set-conflicting-bch_const_params-option.patch b/queue-4.9/mtd-docg3-don-t-set-conflicting-bch_const_params-option.patch

new file mode 100644 (file)

index 0000000..c68d95a
--- /dev/null
+++ b/queue-4.9/mtd-docg3-don-t-set-conflicting-bch_const_params-option.patch
@@ -0,0 +1,51 @@
+From be2e1c9dcf76886a83fb1c433a316e26d4ca2550 Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Thu, 11 Oct 2018 13:06:16 +0200
+Subject: mtd: docg3: don't set conflicting BCH_CONST_PARAMS option
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit be2e1c9dcf76886a83fb1c433a316e26d4ca2550 upstream.
+
+I noticed during the creation of another bugfix that the BCH_CONST_PARAMS
+option that is set by DOCG3 breaks setting variable parameters for any
+other users of the BCH library code.
+
+The only other user we have today is the MTD_NAND software BCH
+implementation (most flash controllers use hardware BCH these days
+and are not affected). I considered removing BCH_CONST_PARAMS entirely
+because of the inherent conflict, but according to the description in
+lib/bch.c there is a significant performance benefit in keeping it.
+
+To avoid the immediate problem of the conflict between MTD_NAND_BCH
+and DOCG3, this only sets the constant parameters if MTD_NAND_BCH
+is disabled, which should fix the problem for all cases that
+are affected. This should also work for all stable kernels.
+
+Note that there is only one machine that actually seems to use the
+DOCG3 driver (arch/arm/mach-pxa/mioa701.c), so most users should have
+the driver disabled, but it almost certainly shows up if we wanted
+to test random kernels on machines that use software BCH in MTD.
+
+Fixes: d13d19ece39f ("mtd: docg3: add ECC correction code")
+Cc: stable@vger.kernel.org
+Cc: Robert Jarzmik <robert.jarzmik@free.fr>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mtd/devices/Kconfig |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/mtd/devices/Kconfig
++++ b/drivers/mtd/devices/Kconfig
+@@ -196,7 +196,7 @@ comment "Disk-On-Chip Device Drivers"
+ config MTD_DOCG3
+       tristate "M-Systems Disk-On-Chip G3"
+       select BCH
+-      select BCH_CONST_PARAMS
++      select BCH_CONST_PARAMS if !MTD_NAND_BCH
+       select BITREVERSE
+       ---help---
+         This provides an MTD device driver for the M-Systems DiskOnChip
diff --git a/queue-4.9/netfilter-conntrack-fix-calculation-of-next-bucket-number-in-early_drop.patch b/queue-4.9/netfilter-conntrack-fix-calculation-of-next-bucket-number-in-early_drop.patch

new file mode 100644 (file)

index 0000000..73d9dd8
--- /dev/null
+++ b/queue-4.9/netfilter-conntrack-fix-calculation-of-next-bucket-number-in-early_drop.patch
@@ -0,0 +1,59 @@
+From f393808dc64149ccd0e5a8427505ba2974a59854 Mon Sep 17 00:00:00 2001
+From: Vasily Khoruzhick <vasilykh@arista.com>
+Date: Thu, 25 Oct 2018 12:15:43 -0700
+Subject: netfilter: conntrack: fix calculation of next bucket number in early_drop
+
+From: Vasily Khoruzhick <vasilykh@arista.com>
+
+commit f393808dc64149ccd0e5a8427505ba2974a59854 upstream.
+
+If there's no entry to drop in bucket that corresponds to the hash,
+early_drop() should look for it in other buckets. But since it increments
+hash instead of bucket number, it actually looks in the same bucket 8
+times: hsize is 16k by default (14 bits) and hash is 32-bit value, so
+reciprocal_scale(hash, hsize) returns the same value for hash..hash+7 in
+most cases.
+
+Fix it by increasing bucket number instead of hash and rename _hash
+to bucket to avoid future confusion.
+
+Fixes: 3e86638e9a0b ("netfilter: conntrack: consider ct netns in early_drop logic")
+Cc: <stable@vger.kernel.org> # v4.7+
+Signed-off-by: Vasily Khoruzhick <vasilykh@arista.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/nf_conntrack_core.c |   13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -918,19 +918,22 @@ static unsigned int early_drop_list(stru
+       return drops;
+ }
+ 
+-static noinline int early_drop(struct net *net, unsigned int _hash)
++static noinline int early_drop(struct net *net, unsigned int hash)
+ {
+-      unsigned int i;
++      unsigned int i, bucket;
+ 
+       for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
+               struct hlist_nulls_head *ct_hash;
+-              unsigned int hash, hsize, drops;
++              unsigned int hsize, drops;
+ 
+               rcu_read_lock();
+               nf_conntrack_get_ht(&ct_hash, &hsize);
+-              hash = reciprocal_scale(_hash++, hsize);
++              if (!i)
++                      bucket = reciprocal_scale(hash, hsize);
++              else
++                      bucket = (bucket + 1) % hsize;
+ 
+-              drops = early_drop_list(net, &ct_hash[hash]);
++              drops = early_drop_list(net, &ct_hash[bucket]);
+               rcu_read_unlock();
+ 
+               if (drops) {
diff --git a/queue-4.9/ocfs2-fix-a-misuse-a-of-brelse-after-failing-ocfs2_check_dir_entry.patch b/queue-4.9/ocfs2-fix-a-misuse-a-of-brelse-after-failing-ocfs2_check_dir_entry.patch

new file mode 100644 (file)

index 0000000..aca9e69
--- /dev/null
+++ b/queue-4.9/ocfs2-fix-a-misuse-a-of-brelse-after-failing-ocfs2_check_dir_entry.patch
@@ -0,0 +1,54 @@
+From 29aa30167a0a2e6045a0d6d2e89d8168132333d5 Mon Sep 17 00:00:00 2001
+From: Changwei Ge <ge.changwei@h3c.com>
+Date: Fri, 2 Nov 2018 15:48:15 -0700
+Subject: ocfs2: fix a misuse a of brelse after failing ocfs2_check_dir_entry
+
+From: Changwei Ge <ge.changwei@h3c.com>
+
+commit 29aa30167a0a2e6045a0d6d2e89d8168132333d5 upstream.
+
+Somehow, file system metadata was corrupted, which causes
+ocfs2_check_dir_entry() to fail in function ocfs2_dir_foreach_blk_el().
+
+According to the original design intention, if above happens we should
+skip the problematic block and continue to retrieve dir entry.  But
+there is obviouse misuse of brelse around related code.
+
+After failure of ocfs2_check_dir_entry(), current code just moves to
+next position and uses the problematic buffer head again and again
+during which the problematic buffer head is released for multiple times.
+I suppose, this a serious issue which is long-lived in ocfs2.  This may
+cause other file systems which is also used in a the same host insane.
+
+So we should also consider about bakcporting this patch into linux
+-stable.
+
+Link: http://lkml.kernel.org/r/HK2PR06MB045211675B43EED794E597B6D56E0@HK2PR06MB0452.apcprd06.prod.outlook.com
+Signed-off-by: Changwei Ge <ge.changwei@h3c.com>
+Suggested-by: Changkuo Shi <shi.changkuo@h3c.com>
+Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Joseph Qi <jiangqi903@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dir.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/ocfs2/dir.c
++++ b/fs/ocfs2/dir.c
+@@ -1896,8 +1896,7 @@ static int ocfs2_dir_foreach_blk_el(stru
+                               /* On error, skip the f_pos to the
+                                  next block. */
+                               ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
+-                              brelse(bh);
+-                              continue;
++                              break;
+                       }
+                       if (le64_to_cpu(de->inode)) {
+                               unsigned char d_type = DT_UNKNOWN;
diff --git a/queue-4.9/of-numa-validate-some-distance-map-rules.patch b/queue-4.9/of-numa-validate-some-distance-map-rules.patch

new file mode 100644 (file)

index 0000000..79f4be7
--- /dev/null
+++ b/queue-4.9/of-numa-validate-some-distance-map-rules.patch
@@ -0,0 +1,80 @@
+From 89c38422e072bb453e3045b8f1b962a344c3edea Mon Sep 17 00:00:00 2001
+From: John Garry <john.garry@huawei.com>
+Date: Thu, 8 Nov 2018 18:17:03 +0800
+Subject: of, numa: Validate some distance map rules
+
+From: John Garry <john.garry@huawei.com>
+
+commit 89c38422e072bb453e3045b8f1b962a344c3edea upstream.
+
+Currently the NUMA distance map parsing does not validate the distance
+table for the distance-matrix rules 1-2 in [1].
+
+However the arch NUMA code may enforce some of these rules, but not all.
+Such is the case for the arm64 port, which does not enforce the rule that
+the distance between separates nodes cannot equal LOCAL_DISTANCE.
+
+The patch adds the following rules validation:
+- distance of node to self equals LOCAL_DISTANCE
+- distance of separate nodes > LOCAL_DISTANCE
+
+This change avoids a yet-unresolved crash reported in [2].
+
+A note on dealing with symmetrical distances between nodes:
+
+Validating symmetrical distances between nodes is difficult. If it were
+mandated in the bindings that every distance must be recorded in the
+table, then it would be easy. However, it isn't.
+
+In addition to this, it is also possible to record [b, a] distance only
+(and not [a, b]). So, when processing the table for [b, a], we cannot
+assert that current distance of [a, b] != [b, a] as invalid, as [a, b]
+distance may not be present in the table and current distance would be
+default at REMOTE_DISTANCE.
+
+As such, we maintain the policy that we overwrite distance [a, b] = [b, a]
+for b > a. This policy is different to kernel ACPI SLIT validation, which
+allows non-symmetrical distances (ACPI spec SLIT rules allow it). However,
+the distance debug message is dropped as it may be misleading (for a distance
+which is later overwritten).
+
+Some final notes on semantics:
+
+- It is implied that it is the responsibility of the arch NUMA code to
+  reset the NUMA distance map for an error in distance map parsing.
+
+- It is the responsibility of the FW NUMA topology parsing (whether OF or
+  ACPI) to enforce NUMA distance rules, and not arch NUMA code.
+
+[1] Documents/devicetree/bindings/numa.txt
+[2] https://www.spinics.net/lists/arm-kernel/msg683304.html
+
+Cc: stable@vger.kernel.org # 4.7
+Signed-off-by: John Garry <john.garry@huawei.com>
+Acked-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Rob Herring <robh@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/of/of_numa.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/drivers/of/of_numa.c
++++ b/drivers/of/of_numa.c
+@@ -126,9 +126,14 @@ static int __init of_numa_parse_distance
+               distance = of_read_number(matrix, 1);
+               matrix++;
+ 
++              if ((nodea == nodeb && distance != LOCAL_DISTANCE) ||
++                  (nodea != nodeb && distance <= LOCAL_DISTANCE)) {
++                      pr_err("Invalid distance[node%d -> node%d] = %d\n",
++                             nodea, nodeb, distance);
++                      return -EINVAL;
++              }
++
+               numa_set_distance(nodea, nodeb, distance);
+-              pr_debug("distance[node%d -> node%d] = %d\n",
+-                       nodea, nodeb, distance);
+ 
+               /* Set default distance of node B->A same as A->B */
+               if (nodeb > nodea)
diff --git a/queue-4.9/reset-hisilicon-fix-potential-null-pointer-dereference.patch b/queue-4.9/reset-hisilicon-fix-potential-null-pointer-dereference.patch

new file mode 100644 (file)

index 0000000..f1d79e6
--- /dev/null
+++ b/queue-4.9/reset-hisilicon-fix-potential-null-pointer-dereference.patch
@@ -0,0 +1,42 @@
+From e9a2310fb689151166df7fd9971093362d34bd79 Mon Sep 17 00:00:00 2001
+From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
+Date: Wed, 25 Jul 2018 19:47:19 -0500
+Subject: reset: hisilicon: fix potential NULL pointer dereference
+
+From: Gustavo A. R. Silva <gustavo@embeddedor.com>
+
+commit e9a2310fb689151166df7fd9971093362d34bd79 upstream.
+
+There is a potential execution path in which function
+platform_get_resource() returns NULL. If this happens,
+we will end up having a NULL pointer dereference.
+
+Fix this by replacing devm_ioremap with devm_ioremap_resource,
+which has the NULL check and the memory region request.
+
+This code was detected with the help of Coccinelle.
+
+Cc: stable@vger.kernel.org
+Fixes: 97b7129cd2af ("reset: hisilicon: change the definition of hisi_reset_init")
+Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
+Signed-off-by: Stephen Boyd <sboyd@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/clk/hisilicon/reset.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/drivers/clk/hisilicon/reset.c
++++ b/drivers/clk/hisilicon/reset.c
+@@ -109,9 +109,8 @@ struct hisi_reset_controller *hisi_reset
+               return NULL;
+ 
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+-      rstc->membase = devm_ioremap(&pdev->dev,
+-                              res->start, resource_size(res));
+-      if (!rstc->membase)
++      rstc->membase = devm_ioremap_resource(&pdev->dev, res);
++      if (IS_ERR(rstc->membase))
+               return NULL;
+ 
+       spin_lock_init(&rstc->lock);
diff --git a/queue-4.9/revert-ceph-fix-dentry-leak-in-splice_dentry.patch b/queue-4.9/revert-ceph-fix-dentry-leak-in-splice_dentry.patch

new file mode 100644 (file)

index 0000000..bc2d7dd
--- /dev/null
+++ b/queue-4.9/revert-ceph-fix-dentry-leak-in-splice_dentry.patch
@@ -0,0 +1,41 @@
+From efe328230dc01aa0b1269aad0b5fae73eea4677a Mon Sep 17 00:00:00 2001
+From: "Yan, Zheng" <zyan@redhat.com>
+Date: Thu, 27 Sep 2018 21:16:05 +0800
+Subject: Revert "ceph: fix dentry leak in splice_dentry()"
+
+From: Yan, Zheng <zyan@redhat.com>
+
+commit efe328230dc01aa0b1269aad0b5fae73eea4677a upstream.
+
+This reverts commit 8b8f53af1ed9df88a4c0fbfdf3db58f62060edf3.
+
+splice_dentry() is used by three places. For two places, req->r_dentry
+is passed to splice_dentry(). In the case of error, req->r_dentry does
+not get updated. So splice_dentry() should not drop reference.
+
+Cc: stable@vger.kernel.org # 4.18+
+Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/inode.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/fs/ceph/inode.c
++++ b/fs/ceph/inode.c
+@@ -1077,8 +1077,12 @@ static struct dentry *splice_dentry(stru
+       if (IS_ERR(realdn)) {
+               pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
+                      PTR_ERR(realdn), dn, in, ceph_vinop(in));
+-              dput(dn);
+-              dn = realdn; /* note realdn contains the error */
++              dn = realdn;
++              /*
++               * Caller should release 'dn' in the case of error.
++               * If 'req->r_dentry' is passed to this function,
++               * caller should leave 'req->r_dentry' untouched.
++               */
+               goto out;
+       } else if (realdn) {
+               dout("dn %p (%d) spliced with %p (%d) "
diff --git a/queue-4.9/series b/queue-4.9/series

index 2c3efd2c8ac98a93c5f703589f9f96665b200fae..406c359ff9916adc7738769ac26aff40dbce384a 100644 (file)
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -28,3 +28,20 @@ parisc-fix-hpmc-handler-by-increasing-size-to-multip.patch
  parisc-fix-exported-address-of-os_hpmc-handler.patch
  mips-loongson-3-fix-cpu-uart-irq-delivery-problem.patch
  mips-loongson-3-fix-bridge-irq-delivery-problem.patch
+xtensa-add-notes-section-to-the-linker-script.patch
+xtensa-make-sure-bflt-stack-is-16-byte-aligned.patch
+xtensa-fix-boot-parameters-address-translation.patch
+clk-s2mps11-fix-matching-when-built-as-module-and-dt-node-contains-compatible.patch
+clk-at91-fix-division-by-zero-in-pll-recalc_rate.patch
+clk-rockchip-fix-static-checker-warning-in-rockchip_ddrclk_get_parent-call.patch
+libceph-bump-ceph_msg_max_data_len.patch
+revert-ceph-fix-dentry-leak-in-splice_dentry.patch
+mach64-fix-display-corruption-on-big-endian-machines.patch
+mach64-fix-image-corruption-due-to-reading-accelerator-registers.patch
+reset-hisilicon-fix-potential-null-pointer-dereference.patch
+vhost-scsi-truncate-t10-pi-iov_iter-to-prot_bytes.patch
+ocfs2-fix-a-misuse-a-of-brelse-after-failing-ocfs2_check_dir_entry.patch
+mm-thp-relax-__gfp_thisnode-for-madv_hugepage-mappings.patch
+netfilter-conntrack-fix-calculation-of-next-bucket-number-in-early_drop.patch
+mtd-docg3-don-t-set-conflicting-bch_const_params-option.patch
+of-numa-validate-some-distance-map-rules.patch
diff --git a/queue-4.9/vhost-scsi-truncate-t10-pi-iov_iter-to-prot_bytes.patch b/queue-4.9/vhost-scsi-truncate-t10-pi-iov_iter-to-prot_bytes.patch

new file mode 100644 (file)

index 0000000..7f50a92
--- /dev/null
+++ b/queue-4.9/vhost-scsi-truncate-t10-pi-iov_iter-to-prot_bytes.patch
@@ -0,0 +1,47 @@
+From 4542d623c7134bc1738f8a68ccb6dd546f1c264f Mon Sep 17 00:00:00 2001
+From: Greg Edwards <gedwards@ddn.com>
+Date: Wed, 22 Aug 2018 13:21:53 -0600
+Subject: vhost/scsi: truncate T10 PI iov_iter to prot_bytes
+
+From: Greg Edwards <gedwards@ddn.com>
+
+commit 4542d623c7134bc1738f8a68ccb6dd546f1c264f upstream.
+
+Commands with protection information included were not truncating the
+protection iov_iter to the number of protection bytes in the command.
+This resulted in vhost_scsi mis-calculating the size of the protection
+SGL in vhost_scsi_calc_sgls(), and including both the protection and
+data SG entries in the protection SGL.
+
+Fixes: 09b13fa8c1a1 ("vhost/scsi: Add ANY_LAYOUT support in vhost_scsi_handle_vq")
+Signed-off-by: Greg Edwards <gedwards@ddn.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Fixes: 09b13fa8c1a1093e9458549ac8bb203a7c65c62a
+Cc: stable@vger.kernel.org
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/vhost/scsi.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/vhost/scsi.c
++++ b/drivers/vhost/scsi.c
+@@ -999,7 +999,8 @@ vhost_scsi_handle_vq(struct vhost_scsi *
+                               prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin);
+                       }
+                       /*
+-                       * Set prot_iter to data_iter, and advance past any
++                       * Set prot_iter to data_iter and truncate it to
++                       * prot_bytes, and advance data_iter past any
+                        * preceeding prot_bytes that may be present.
+                        *
+                        * Also fix up the exp_data_len to reflect only the
+@@ -1008,6 +1009,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *
+                       if (prot_bytes) {
+                               exp_data_len -= prot_bytes;
+                               prot_iter = data_iter;
++                              iov_iter_truncate(&prot_iter, prot_bytes);
+                               iov_iter_advance(&data_iter, prot_bytes);
+                       }
+                       tag = vhost64_to_cpu(vq, v_req_pi.tag);
diff --git a/queue-4.9/xtensa-add-notes-section-to-the-linker-script.patch b/queue-4.9/xtensa-add-notes-section-to-the-linker-script.patch

new file mode 100644 (file)

index 0000000..8f6abcb
--- /dev/null
+++ b/queue-4.9/xtensa-add-notes-section-to-the-linker-script.patch
@@ -0,0 +1,44 @@
+From 4119ba211bc4f1bf638f41e50b7a0f329f58aa16 Mon Sep 17 00:00:00 2001
+From: Max Filippov <jcmvbkbc@gmail.com>
+Date: Mon, 29 Oct 2018 18:30:13 -0700
+Subject: xtensa: add NOTES section to the linker script
+
+From: Max Filippov <jcmvbkbc@gmail.com>
+
+commit 4119ba211bc4f1bf638f41e50b7a0f329f58aa16 upstream.
+
+This section collects all source .note.* sections together in the
+vmlinux image. Without it .note.Linux section may be placed at address
+0, while the rest of the kernel is at its normal address, resulting in a
+huge vmlinux.bin image that may not be linked into the xtensa Image.elf.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/xtensa/boot/Makefile        |    2 +-
+ arch/xtensa/kernel/vmlinux.lds.S |    1 +
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/xtensa/boot/Makefile
++++ b/arch/xtensa/boot/Makefile
+@@ -31,7 +31,7 @@ $(bootdir-y): $(addprefix $(obj)/,$(subd
+             $(addprefix $(obj)/,$(host-progs))
+       $(Q)$(MAKE) $(build)=$(obj)/$@ $(MAKECMDGOALS)
+ 
+-OBJCOPYFLAGS = --strip-all -R .comment -R .note.gnu.build-id -O binary
++OBJCOPYFLAGS = --strip-all -R .comment -R .notes -O binary
+ 
+ vmlinux.bin: vmlinux FORCE
+       $(call if_changed,objcopy)
+--- a/arch/xtensa/kernel/vmlinux.lds.S
++++ b/arch/xtensa/kernel/vmlinux.lds.S
+@@ -109,6 +109,7 @@ SECTIONS
+   .fixup   : { *(.fixup) }
+ 
+   EXCEPTION_TABLE(16)
++  NOTES
+   /* Data section */
+ 
+   _sdata = .;
diff --git a/queue-4.9/xtensa-fix-boot-parameters-address-translation.patch b/queue-4.9/xtensa-fix-boot-parameters-address-translation.patch

new file mode 100644 (file)

index 0000000..906f720
--- /dev/null
+++ b/queue-4.9/xtensa-fix-boot-parameters-address-translation.patch
@@ -0,0 +1,43 @@
+From 40dc948f234b73497c3278875eb08a01d5854d3f Mon Sep 17 00:00:00 2001
+From: Max Filippov <jcmvbkbc@gmail.com>
+Date: Tue, 13 Nov 2018 23:46:42 -0800
+Subject: xtensa: fix boot parameters address translation
+
+From: Max Filippov <jcmvbkbc@gmail.com>
+
+commit 40dc948f234b73497c3278875eb08a01d5854d3f upstream.
+
+The bootloader may pass physical address of the boot parameters structure
+to the MMUv3 kernel in the register a2. Code in the _SetupMMU block in
+the arch/xtensa/kernel/head.S is supposed to map that physical address to
+the virtual address in the configured virtual memory layout.
+
+This code haven't been updated when additional 256+256 and 512+512
+memory layouts were introduced and it may produce wrong addresses when
+used with these layouts.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/xtensa/kernel/head.S |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/xtensa/kernel/head.S
++++ b/arch/xtensa/kernel/head.S
+@@ -88,9 +88,12 @@ _SetupMMU:
+       initialize_mmu
+ #if defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY
+       rsr     a2, excsave1
+-      movi    a3, 0x08000000
++      movi    a3, XCHAL_KSEG_PADDR
++      bltu    a2, a3, 1f
++      sub     a2, a2, a3
++      movi    a3, XCHAL_KSEG_SIZE
+       bgeu    a2, a3, 1f
+-      movi    a3, 0xd0000000
++      movi    a3, XCHAL_KSEG_CACHED_VADDR
+       add     a2, a2, a3
+       wsr     a2, excsave1
+ 1:
diff --git a/queue-4.9/xtensa-make-sure-bflt-stack-is-16-byte-aligned.patch b/queue-4.9/xtensa-make-sure-bflt-stack-is-16-byte-aligned.patch

new file mode 100644 (file)

index 0000000..9b0d585
--- /dev/null
+++ b/queue-4.9/xtensa-make-sure-bflt-stack-is-16-byte-aligned.patch
@@ -0,0 +1,45 @@
+From 0773495b1f5f1c5e23551843f87b5ff37e7af8f7 Mon Sep 17 00:00:00 2001
+From: Max Filippov <jcmvbkbc@gmail.com>
+Date: Sun, 4 Nov 2018 01:46:00 -0700
+Subject: xtensa: make sure bFLT stack is 16 byte aligned
+
+From: Max Filippov <jcmvbkbc@gmail.com>
+
+commit 0773495b1f5f1c5e23551843f87b5ff37e7af8f7 upstream.
+
+Xtensa ABI requires stack alignment to be at least 16. In noMMU
+configuration ARCH_SLAB_MINALIGN is used to align stack. Make it at
+least 16.
+
+This fixes the following runtime error in noMMU configuration, caused by
+interaction between insufficiently aligned stack and alloca function,
+that results in corruption of on-stack variable in the libc function
+glob:
+
+ Caught unhandled exception in 'sh' (pid = 47, pc = 0x02d05d65)
+  - should not happen
+  EXCCAUSE is 15
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/xtensa/include/asm/processor.h |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/xtensa/include/asm/processor.h
++++ b/arch/xtensa/include/asm/processor.h
+@@ -24,7 +24,11 @@
+ # error Linux requires the Xtensa Windowed Registers Option.
+ #endif
+ 
+-#define ARCH_SLAB_MINALIGN    XCHAL_DATA_WIDTH
++/* Xtensa ABI requires stack alignment to be at least 16 */
++
++#define STACK_ALIGN (XCHAL_DATA_WIDTH > 16 ? XCHAL_DATA_WIDTH : 16)
++
++#define ARCH_SLAB_MINALIGN STACK_ALIGN
+ 
+ /*
+  * User space process size: 1 GB.
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 19 Nov 2018 11:06:08 +0000 (12:06 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 19 Nov 2018 11:06:08 +0000 (12:06 +0100)
queue-4.9/clk-at91-fix-division-by-zero-in-pll-recalc_rate.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/clk-rockchip-fix-static-checker-warning-in-rockchip_ddrclk_get_parent-call.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/clk-s2mps11-fix-matching-when-built-as-module-and-dt-node-contains-compatible.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/libceph-bump-ceph_msg_max_data_len.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/mach64-fix-display-corruption-on-big-endian-machines.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/mach64-fix-image-corruption-due-to-reading-accelerator-registers.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/mm-thp-relax-__gfp_thisnode-for-madv_hugepage-mappings.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/mtd-docg3-don-t-set-conflicting-bch_const_params-option.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/netfilter-conntrack-fix-calculation-of-next-bucket-number-in-early_drop.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ocfs2-fix-a-misuse-a-of-brelse-after-failing-ocfs2_check_dir_entry.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/of-numa-validate-some-distance-map-rules.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/reset-hisilicon-fix-potential-null-pointer-dereference.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/revert-ceph-fix-dentry-leak-in-splice_dentry.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/series		patch \| blob \| blame \| history
queue-4.9/vhost-scsi-truncate-t10-pi-iov_iter-to-prot_bytes.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/xtensa-add-notes-section-to-the-linker-script.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/xtensa-fix-boot-parameters-address-translation.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/xtensa-make-sure-bflt-stack-is-16-byte-aligned.patch	[new file with mode: 0644]	patch \| blob