]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 26 Sep 2015 17:53:12 +0000 (10:53 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 26 Sep 2015 17:53:12 +0000 (10:53 -0700)
added patches:
hfs-fix-b-tree-corruption-after-insertion-at-position-0.patch
hfs-hfsplus-cache-pages-correctly-between-bnode_create-and-bnode_free.patch
ib-mlx4-forbid-using-sysfs-to-change-roce-pkeys.patch
ib-mlx4-use-correct-sl-on-ah-query-under-roce.patch
ib-qib-change-lkey-table-allocation-to-support-more-mrs.patch
ib-uverbs-fix-race-between-ib_uverbs_open-and-remove_one.patch
ib-uverbs-reject-invalid-or-unknown-opcodes.patch
stmmac-fix-check-for-phydev-being-open.patch
stmmac-troubleshoot-unexpected-bits-in-des0-des1.patch

queue-3.14/hfs-fix-b-tree-corruption-after-insertion-at-position-0.patch [new file with mode: 0644]
queue-3.14/hfs-hfsplus-cache-pages-correctly-between-bnode_create-and-bnode_free.patch [new file with mode: 0644]
queue-3.14/ib-mlx4-forbid-using-sysfs-to-change-roce-pkeys.patch [new file with mode: 0644]
queue-3.14/ib-mlx4-use-correct-sl-on-ah-query-under-roce.patch [new file with mode: 0644]
queue-3.14/ib-qib-change-lkey-table-allocation-to-support-more-mrs.patch [new file with mode: 0644]
queue-3.14/ib-uverbs-fix-race-between-ib_uverbs_open-and-remove_one.patch [new file with mode: 0644]
queue-3.14/ib-uverbs-reject-invalid-or-unknown-opcodes.patch [new file with mode: 0644]
queue-3.14/series
queue-3.14/stmmac-fix-check-for-phydev-being-open.patch [new file with mode: 0644]
queue-3.14/stmmac-troubleshoot-unexpected-bits-in-des0-des1.patch [new file with mode: 0644]

diff --git a/queue-3.14/hfs-fix-b-tree-corruption-after-insertion-at-position-0.patch b/queue-3.14/hfs-fix-b-tree-corruption-after-insertion-at-position-0.patch
new file mode 100644 (file)
index 0000000..5ced0dd
--- /dev/null
@@ -0,0 +1,77 @@
+From b4cc0efea4f0bfa2477c56af406cfcf3d3e58680 Mon Sep 17 00:00:00 2001
+From: Hin-Tak Leung <htl10@users.sourceforge.net>
+Date: Wed, 9 Sep 2015 15:38:07 -0700
+Subject: hfs: fix B-tree corruption after insertion at position 0
+
+From: Hin-Tak Leung <htl10@users.sourceforge.net>
+
+commit b4cc0efea4f0bfa2477c56af406cfcf3d3e58680 upstream.
+
+Fix B-tree corruption when a new record is inserted at position 0 in the
+node in hfs_brec_insert().
+
+This is an identical change to the corresponding hfs b-tree code to Sergei
+Antonov's "hfsplus: fix B-tree corruption after insertion at position 0",
+to keep similar code paths in the hfs and hfsplus drivers in sync, where
+appropriate.
+
+Signed-off-by: Hin-Tak Leung <htl10@users.sourceforge.net>
+Cc: Sergei Antonov <saproj@gmail.com>
+Cc: Joe Perches <joe@perches.com>
+Reviewed-by: Vyacheslav Dubeyko <slava@dubeyko.com>
+Cc: Anton Altaparmakov <anton@tuxera.com>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/hfs/brec.c |   20 +++++++++++---------
+ 1 file changed, 11 insertions(+), 9 deletions(-)
+
+--- a/fs/hfs/brec.c
++++ b/fs/hfs/brec.c
+@@ -131,13 +131,16 @@ skip:
+       hfs_bnode_write(node, entry, data_off + key_len, entry_len);
+       hfs_bnode_dump(node);
+-      if (new_node) {
+-              /* update parent key if we inserted a key
+-               * at the start of the first node
+-               */
+-              if (!rec && new_node != node)
+-                      hfs_brec_update_parent(fd);
++      /*
++       * update parent key if we inserted a key
++       * at the start of the node and it is not the new node
++       */
++      if (!rec && new_node != node) {
++              hfs_bnode_read_key(node, fd->search_key, data_off + size);
++              hfs_brec_update_parent(fd);
++      }
++      if (new_node) {
+               hfs_bnode_put(fd->bnode);
+               if (!new_node->parent) {
+                       hfs_btree_inc_height(tree);
+@@ -166,9 +169,6 @@ skip:
+               goto again;
+       }
+-      if (!rec)
+-              hfs_brec_update_parent(fd);
+-
+       return 0;
+ }
+@@ -366,6 +366,8 @@ again:
+       if (IS_ERR(parent))
+               return PTR_ERR(parent);
+       __hfs_brec_find(parent, fd);
++      if (fd->record < 0)
++              return -ENOENT;
+       hfs_bnode_dump(parent);
+       rec = fd->record;
diff --git a/queue-3.14/hfs-hfsplus-cache-pages-correctly-between-bnode_create-and-bnode_free.patch b/queue-3.14/hfs-hfsplus-cache-pages-correctly-between-bnode_create-and-bnode_free.patch
new file mode 100644 (file)
index 0000000..36d8bcc
--- /dev/null
@@ -0,0 +1,180 @@
+From 7cb74be6fd827e314f81df3c5889b87e4c87c569 Mon Sep 17 00:00:00 2001
+From: Hin-Tak Leung <htl10@users.sourceforge.net>
+Date: Wed, 9 Sep 2015 15:38:04 -0700
+Subject: hfs,hfsplus: cache pages correctly between bnode_create and bnode_free
+
+From: Hin-Tak Leung <htl10@users.sourceforge.net>
+
+commit 7cb74be6fd827e314f81df3c5889b87e4c87c569 upstream.
+
+Pages looked up by __hfs_bnode_create() (called by hfs_bnode_create() and
+hfs_bnode_find() for finding or creating pages corresponding to an inode)
+are immediately kmap()'ed and used (both read and write) and kunmap()'ed,
+and should not be page_cache_release()'ed until hfs_bnode_free().
+
+This patch fixes a problem I first saw in July 2012: merely running "du"
+on a large hfsplus-mounted directory a few times on a reasonably loaded
+system would get the hfsplus driver all confused and complaining about
+B-tree inconsistencies, and generates a "BUG: Bad page state".  Most
+recently, I can generate this problem on up-to-date Fedora 22 with shipped
+kernel 4.0.5, by running "du /" (="/" + "/home" + "/mnt" + other smaller
+mounts) and "du /mnt" simultaneously on two windows, where /mnt is a
+lightly-used QEMU VM image of the full Mac OS X 10.9:
+
+$ df -i / /home /mnt
+Filesystem                  Inodes   IUsed      IFree IUse% Mounted on
+/dev/mapper/fedora-root    3276800  551665    2725135   17% /
+/dev/mapper/fedora-home   52879360  716221   52163139    2% /home
+/dev/nbd0p2             4294967295 1387818 4293579477    1% /mnt
+
+After applying the patch, I was able to run "du /" (60+ times) and "du
+/mnt" (150+ times) continuously and simultaneously for 6+ hours.
+
+There are many reports of the hfsplus driver getting confused under load
+and generating "BUG: Bad page state" or other similar issues over the
+years.  [1]
+
+The unpatched code [2] has always been wrong since it entered the kernel
+tree.  The only reason why it gets away with it is that the
+kmap/memcpy/kunmap follow very quickly after the page_cache_release() so
+the kernel has not had a chance to reuse the memory for something else,
+most of the time.
+
+The current RW driver appears to have followed the design and development
+of the earlier read-only hfsplus driver [3], where-by version 0.1 (Dec
+2001) had a B-tree node-centric approach to
+read_cache_page()/page_cache_release() per bnode_get()/bnode_put(),
+migrating towards version 0.2 (June 2002) of caching and releasing pages
+per inode extents.  When the current RW code first entered the kernel [2]
+in 2005, there was an REF_PAGES conditional (and "//" commented out code)
+to switch between B-node centric paging to inode-centric paging.  There
+was a mistake with the direction of one of the REF_PAGES conditionals in
+__hfs_bnode_create().  In a subsequent "remove debug code" commit [4], the
+read_cache_page()/page_cache_release() per bnode_get()/bnode_put() were
+removed, but a page_cache_release() was mistakenly left in (propagating
+the "REF_PAGES <-> !REF_PAGE" mistake), and the commented-out
+page_cache_release() in bnode_release() (which should be spanned by
+!REF_PAGES) was never enabled.
+
+References:
+[1]:
+Michael Fox, Apr 2013
+http://www.spinics.net/lists/linux-fsdevel/msg63807.html
+("hfsplus volume suddenly inaccessable after 'hfs: recoff %d too large'")
+
+Sasha Levin, Feb 2015
+http://lkml.org/lkml/2015/2/20/85 ("use after free")
+
+https://bugs.launchpad.net/ubuntu/+source/linux/+bug/740814
+https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1027887
+https://bugzilla.kernel.org/show_bug.cgi?id=42342
+https://bugzilla.kernel.org/show_bug.cgi?id=63841
+https://bugzilla.kernel.org/show_bug.cgi?id=78761
+
+[2]:
+http://git.kernel.org/cgit/linux/kernel/git/tglx/history.git/commit/\
+fs/hfs/bnode.c?id=d1081202f1d0ee35ab0beb490da4b65d4bc763db
+commit d1081202f1d0ee35ab0beb490da4b65d4bc763db
+Author: Andrew Morton <akpm@osdl.org>
+Date:   Wed Feb 25 16:17:36 2004 -0800
+
+    [PATCH] HFS rewrite
+
+http://git.kernel.org/cgit/linux/kernel/git/tglx/history.git/commit/\
+fs/hfsplus/bnode.c?id=91556682e0bf004d98a529bf829d339abb98bbbd
+
+commit 91556682e0bf004d98a529bf829d339abb98bbbd
+Author: Andrew Morton <akpm@osdl.org>
+Date:   Wed Feb 25 16:17:48 2004 -0800
+
+    [PATCH] HFS+ support
+
+[3]:
+http://sourceforge.net/projects/linux-hfsplus/
+
+http://sourceforge.net/projects/linux-hfsplus/files/Linux%202.4.x%20patch/hfsplus%200.1/
+http://sourceforge.net/projects/linux-hfsplus/files/Linux%202.4.x%20patch/hfsplus%200.2/
+
+http://linux-hfsplus.cvs.sourceforge.net/viewvc/linux-hfsplus/linux/\
+fs/hfsplus/bnode.c?r1=1.4&r2=1.5
+
+Date:   Thu Jun 6 09:45:14 2002 +0000
+Use buffer cache instead of page cache in bnode.c. Cache inode extents.
+
+[4]:
+http://git.kernel.org/cgit/linux/kernel/git/\
+stable/linux-stable.git/commit/?id=a5e3985fa014029eb6795664c704953720cc7f7d
+
+commit a5e3985fa014029eb6795664c704953720cc7f7d
+Author: Roman Zippel <zippel@linux-m68k.org>
+Date:   Tue Sep 6 15:18:47 2005 -0700
+
+[PATCH] hfs: remove debug code
+
+Signed-off-by: Hin-Tak Leung <htl10@users.sourceforge.net>
+Signed-off-by: Sergei Antonov <saproj@gmail.com>
+Reviewed-by: Anton Altaparmakov <anton@tuxera.com>
+Reported-by: Sasha Levin <sasha.levin@oracle.com>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Vyacheslav Dubeyko <slava@dubeyko.com>
+Cc: Sougata Santra <sougata@tuxera.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/hfs/bnode.c     |    9 ++++-----
+ fs/hfsplus/bnode.c |    3 ---
+ 2 files changed, 4 insertions(+), 8 deletions(-)
+
+--- a/fs/hfs/bnode.c
++++ b/fs/hfs/bnode.c
+@@ -288,7 +288,6 @@ static struct hfs_bnode *__hfs_bnode_cre
+                       page_cache_release(page);
+                       goto fail;
+               }
+-              page_cache_release(page);
+               node->page[i] = page;
+       }
+@@ -398,11 +397,11 @@ node_error:
+ void hfs_bnode_free(struct hfs_bnode *node)
+ {
+-      //int i;
++      int i;
+-      //for (i = 0; i < node->tree->pages_per_bnode; i++)
+-      //      if (node->page[i])
+-      //              page_cache_release(node->page[i]);
++      for (i = 0; i < node->tree->pages_per_bnode; i++)
++              if (node->page[i])
++                      page_cache_release(node->page[i]);
+       kfree(node);
+ }
+--- a/fs/hfsplus/bnode.c
++++ b/fs/hfsplus/bnode.c
+@@ -456,7 +456,6 @@ static struct hfs_bnode *__hfs_bnode_cre
+                       page_cache_release(page);
+                       goto fail;
+               }
+-              page_cache_release(page);
+               node->page[i] = page;
+       }
+@@ -568,13 +567,11 @@ node_error:
+ void hfs_bnode_free(struct hfs_bnode *node)
+ {
+-#if 0
+       int i;
+       for (i = 0; i < node->tree->pages_per_bnode; i++)
+               if (node->page[i])
+                       page_cache_release(node->page[i]);
+-#endif
+       kfree(node);
+ }
diff --git a/queue-3.14/ib-mlx4-forbid-using-sysfs-to-change-roce-pkeys.patch b/queue-3.14/ib-mlx4-forbid-using-sysfs-to-change-roce-pkeys.patch
new file mode 100644 (file)
index 0000000..cb524dd
--- /dev/null
@@ -0,0 +1,51 @@
+From 2b135db3e81301d0452e6aa107349abe67b097d6 Mon Sep 17 00:00:00 2001
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Date: Thu, 30 Jul 2015 17:34:23 +0300
+Subject: IB/mlx4: Forbid using sysfs to change RoCE pkeys
+
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+
+commit 2b135db3e81301d0452e6aa107349abe67b097d6 upstream.
+
+The pkey mapping for RoCE must remain the default mapping:
+VFs:
+  virtual index 0 = mapped to real index 0 (0xFFFF)
+  All others indices: mapped to a real pkey index containing an
+                      invalid pkey.
+PF:
+  virtual index i = real index i.
+
+Don't allow users to change these mappings using files found in
+sysfs.
+
+Fixes: c1e7e466120b ('IB/mlx4: Add iov directory in sysfs under the ib device')
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/mlx4/sysfs.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/infiniband/hw/mlx4/sysfs.c
++++ b/drivers/infiniband/hw/mlx4/sysfs.c
+@@ -563,6 +563,8 @@ static int add_port(struct mlx4_ib_dev *
+       struct mlx4_port *p;
+       int i;
+       int ret;
++      int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port_num) ==
++                      IB_LINK_LAYER_ETHERNET;
+       p = kzalloc(sizeof *p, GFP_KERNEL);
+       if (!p)
+@@ -580,7 +582,8 @@ static int add_port(struct mlx4_ib_dev *
+       p->pkey_group.name  = "pkey_idx";
+       p->pkey_group.attrs =
+-              alloc_group_attrs(show_port_pkey, store_port_pkey,
++              alloc_group_attrs(show_port_pkey,
++                                is_eth ? NULL : store_port_pkey,
+                                 dev->dev->caps.pkey_table_len[port_num]);
+       if (!p->pkey_group.attrs) {
+               ret = -ENOMEM;
diff --git a/queue-3.14/ib-mlx4-use-correct-sl-on-ah-query-under-roce.patch b/queue-3.14/ib-mlx4-use-correct-sl-on-ah-query-under-roce.patch
new file mode 100644 (file)
index 0000000..b13c162
--- /dev/null
@@ -0,0 +1,40 @@
+From 5e99b139f1b68acd65e36515ca347b03856dfb5a Mon Sep 17 00:00:00 2001
+From: Noa Osherovich <noaos@mellanox.com>
+Date: Thu, 30 Jul 2015 17:34:24 +0300
+Subject: IB/mlx4: Use correct SL on AH query under RoCE
+
+From: Noa Osherovich <noaos@mellanox.com>
+
+commit 5e99b139f1b68acd65e36515ca347b03856dfb5a upstream.
+
+The mlx4 IB driver implementation for ib_query_ah used a wrong offset
+(28 instead of 29) when link type is Ethernet. Fixed to use the correct one.
+
+Fixes: fa417f7b520e ('IB/mlx4: Add support for IBoE')
+Signed-off-by: Shani Michaeli <shanim@mellanox.com>
+Signed-off-by: Noa Osherovich <noaos@mellanox.com>
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/mlx4/ah.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/infiniband/hw/mlx4/ah.c
++++ b/drivers/infiniband/hw/mlx4/ah.c
+@@ -147,9 +147,13 @@ int mlx4_ib_query_ah(struct ib_ah *ibah,
+       enum rdma_link_layer ll;
+       memset(ah_attr, 0, sizeof *ah_attr);
+-      ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
+       ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24;
+       ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num);
++      if (ll == IB_LINK_LAYER_ETHERNET)
++              ah_attr->sl = be32_to_cpu(ah->av.eth.sl_tclass_flowlabel) >> 29;
++      else
++              ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
++
+       ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0;
+       if (ah->av.ib.stat_rate)
+               ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET;
diff --git a/queue-3.14/ib-qib-change-lkey-table-allocation-to-support-more-mrs.patch b/queue-3.14/ib-qib-change-lkey-table-allocation-to-support-more-mrs.patch
new file mode 100644 (file)
index 0000000..1506053
--- /dev/null
@@ -0,0 +1,104 @@
+From d6f1c17e162b2a11e708f28fa93f2f79c164b442 Mon Sep 17 00:00:00 2001
+From: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Date: Tue, 21 Jul 2015 08:36:07 -0400
+Subject: IB/qib: Change lkey table allocation to support more MRs
+
+From: Mike Marciniszyn <mike.marciniszyn@intel.com>
+
+commit d6f1c17e162b2a11e708f28fa93f2f79c164b442 upstream.
+
+The lkey table is allocated with with a get_user_pages() with an
+order based on a number of index bits from a module parameter.
+
+The underlying kernel code cannot allocate that many contiguous pages.
+
+There is no reason the underlying memory needs to be physically
+contiguous.
+
+This patch:
+- switches the allocation/deallocation to vmalloc/vfree
+- caps the number of bits to 23 to insure at least 1 generation bit
+  o this matches the module parameter description
+
+Reviewed-by: Vinit Agnihotri <vinit.abhay.agnihotri@intel.com>
+Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/qib/qib_keys.c  |    4 ++++
+ drivers/infiniband/hw/qib/qib_verbs.c |   14 ++++++++++----
+ drivers/infiniband/hw/qib/qib_verbs.h |    2 ++
+ 3 files changed, 16 insertions(+), 4 deletions(-)
+
+--- a/drivers/infiniband/hw/qib/qib_keys.c
++++ b/drivers/infiniband/hw/qib/qib_keys.c
+@@ -86,6 +86,10 @@ int qib_alloc_lkey(struct qib_mregion *m
+        * unrestricted LKEY.
+        */
+       rkt->gen++;
++      /*
++       * bits are capped in qib_verbs.c to insure enough bits
++       * for generation number
++       */
+       mr->lkey = (r << (32 - ib_qib_lkey_table_size)) |
+               ((((1 << (24 - ib_qib_lkey_table_size)) - 1) & rkt->gen)
+                << 8);
+--- a/drivers/infiniband/hw/qib/qib_verbs.c
++++ b/drivers/infiniband/hw/qib/qib_verbs.c
+@@ -40,6 +40,7 @@
+ #include <linux/rculist.h>
+ #include <linux/mm.h>
+ #include <linux/random.h>
++#include <linux/vmalloc.h>
+ #include "qib.h"
+ #include "qib_common.h"
+@@ -2086,10 +2087,16 @@ int qib_register_ib_device(struct qib_de
+        * the LKEY).  The remaining bits act as a generation number or tag.
+        */
+       spin_lock_init(&dev->lk_table.lock);
++      /* insure generation is at least 4 bits see keys.c */
++      if (ib_qib_lkey_table_size > MAX_LKEY_TABLE_BITS) {
++              qib_dev_warn(dd, "lkey bits %u too large, reduced to %u\n",
++                      ib_qib_lkey_table_size, MAX_LKEY_TABLE_BITS);
++              ib_qib_lkey_table_size = MAX_LKEY_TABLE_BITS;
++      }
+       dev->lk_table.max = 1 << ib_qib_lkey_table_size;
+       lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
+       dev->lk_table.table = (struct qib_mregion __rcu **)
+-              __get_free_pages(GFP_KERNEL, get_order(lk_tab_size));
++              vmalloc(lk_tab_size);
+       if (dev->lk_table.table == NULL) {
+               ret = -ENOMEM;
+               goto err_lk;
+@@ -2262,7 +2269,7 @@ err_tx:
+                                       sizeof(struct qib_pio_header),
+                                 dev->pio_hdrs, dev->pio_hdrs_phys);
+ err_hdrs:
+-      free_pages((unsigned long) dev->lk_table.table, get_order(lk_tab_size));
++      vfree(dev->lk_table.table);
+ err_lk:
+       kfree(dev->qp_table);
+ err_qpt:
+@@ -2316,8 +2323,7 @@ void qib_unregister_ib_device(struct qib
+                                       sizeof(struct qib_pio_header),
+                                 dev->pio_hdrs, dev->pio_hdrs_phys);
+       lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
+-      free_pages((unsigned long) dev->lk_table.table,
+-                 get_order(lk_tab_size));
++      vfree(dev->lk_table.table);
+       kfree(dev->qp_table);
+ }
+--- a/drivers/infiniband/hw/qib/qib_verbs.h
++++ b/drivers/infiniband/hw/qib/qib_verbs.h
+@@ -647,6 +647,8 @@ struct qib_qpn_table {
+       struct qpn_map map[QPNMAP_ENTRIES];
+ };
++#define MAX_LKEY_TABLE_BITS 23
++
+ struct qib_lkey_table {
+       spinlock_t lock; /* protect changes in this struct */
+       u32 next;               /* next unused index (speeds search) */
diff --git a/queue-3.14/ib-uverbs-fix-race-between-ib_uverbs_open-and-remove_one.patch b/queue-3.14/ib-uverbs-fix-race-between-ib_uverbs_open-and-remove_one.patch
new file mode 100644 (file)
index 0000000..baf692b
--- /dev/null
@@ -0,0 +1,201 @@
+From 35d4a0b63dc0c6d1177d4f532a9deae958f0662c Mon Sep 17 00:00:00 2001
+From: Yishai Hadas <yishaih@mellanox.com>
+Date: Thu, 13 Aug 2015 18:32:03 +0300
+Subject: IB/uverbs: Fix race between ib_uverbs_open and remove_one
+
+From: Yishai Hadas <yishaih@mellanox.com>
+
+commit 35d4a0b63dc0c6d1177d4f532a9deae958f0662c upstream.
+
+Fixes: 2a72f212263701b927559f6850446421d5906c41 ("IB/uverbs: Remove dev_table")
+
+Before this commit there was a device look-up table that was protected
+by a spin_lock used by ib_uverbs_open and by ib_uverbs_remove_one. When
+it was dropped and container_of was used instead, it enabled the race
+with remove_one as dev might be freed just after:
+dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev) but
+before the kref_get.
+
+In addition, this buggy patch added some dead code as
+container_of(x,y,z) can never be NULL and so dev can never be NULL.
+As a result the comment above ib_uverbs_open saying "the open method
+will either immediately run -ENXIO" is wrong as it can never happen.
+
+The solution follows Jason Gunthorpe suggestion from below URL:
+https://www.mail-archive.com/linux-rdma@vger.kernel.org/msg25692.html
+
+cdev will hold a kref on the parent (the containing structure,
+ib_uverbs_device) and only when that kref is released it is
+guaranteed that open will never be called again.
+
+In addition, fixes the active count scheme to use an atomic
+not a kref to prevent WARN_ON as pointed by above comment
+from Jason.
+
+Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
+Signed-off-by: Shachar Raindel <raindel@mellanox.com>
+Reviewed-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/core/uverbs.h      |    3 +-
+ drivers/infiniband/core/uverbs_main.c |   43 +++++++++++++++++++++++-----------
+ 2 files changed, 32 insertions(+), 14 deletions(-)
+
+--- a/drivers/infiniband/core/uverbs.h
++++ b/drivers/infiniband/core/uverbs.h
+@@ -85,7 +85,7 @@
+  */
+ struct ib_uverbs_device {
+-      struct kref                             ref;
++      atomic_t                                refcount;
+       int                                     num_comp_vectors;
+       struct completion                       comp;
+       struct device                          *dev;
+@@ -94,6 +94,7 @@ struct ib_uverbs_device {
+       struct cdev                             cdev;
+       struct rb_root                          xrcd_tree;
+       struct mutex                            xrcd_tree_mutex;
++      struct kobject                          kobj;
+ };
+ struct ib_uverbs_event_file {
+--- a/drivers/infiniband/core/uverbs_main.c
++++ b/drivers/infiniband/core/uverbs_main.c
+@@ -127,14 +127,18 @@ static int (*uverbs_ex_cmd_table[])(stru
+ static void ib_uverbs_add_one(struct ib_device *device);
+ static void ib_uverbs_remove_one(struct ib_device *device);
+-static void ib_uverbs_release_dev(struct kref *ref)
++static void ib_uverbs_release_dev(struct kobject *kobj)
+ {
+       struct ib_uverbs_device *dev =
+-              container_of(ref, struct ib_uverbs_device, ref);
++              container_of(kobj, struct ib_uverbs_device, kobj);
+-      complete(&dev->comp);
++      kfree(dev);
+ }
++static struct kobj_type ib_uverbs_dev_ktype = {
++      .release = ib_uverbs_release_dev,
++};
++
+ static void ib_uverbs_release_event_file(struct kref *ref)
+ {
+       struct ib_uverbs_event_file *file =
+@@ -298,13 +302,19 @@ static int ib_uverbs_cleanup_ucontext(st
+       return context->device->dealloc_ucontext(context);
+ }
++static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
++{
++      complete(&dev->comp);
++}
++
+ static void ib_uverbs_release_file(struct kref *ref)
+ {
+       struct ib_uverbs_file *file =
+               container_of(ref, struct ib_uverbs_file, ref);
+       module_put(file->device->ib_dev->owner);
+-      kref_put(&file->device->ref, ib_uverbs_release_dev);
++      if (atomic_dec_and_test(&file->device->refcount))
++              ib_uverbs_comp_dev(file->device);
+       kfree(file);
+ }
+@@ -734,9 +744,7 @@ static int ib_uverbs_open(struct inode *
+       int ret;
+       dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
+-      if (dev)
+-              kref_get(&dev->ref);
+-      else
++      if (!atomic_inc_not_zero(&dev->refcount))
+               return -ENXIO;
+       if (!try_module_get(dev->ib_dev->owner)) {
+@@ -757,6 +765,7 @@ static int ib_uverbs_open(struct inode *
+       mutex_init(&file->mutex);
+       filp->private_data = file;
++      kobject_get(&dev->kobj);
+       return nonseekable_open(inode, filp);
+@@ -764,13 +773,16 @@ err_module:
+       module_put(dev->ib_dev->owner);
+ err:
+-      kref_put(&dev->ref, ib_uverbs_release_dev);
++      if (atomic_dec_and_test(&dev->refcount))
++              ib_uverbs_comp_dev(dev);
++
+       return ret;
+ }
+ static int ib_uverbs_close(struct inode *inode, struct file *filp)
+ {
+       struct ib_uverbs_file *file = filp->private_data;
++      struct ib_uverbs_device *dev = file->device;
+       ib_uverbs_cleanup_ucontext(file, file->ucontext);
+@@ -778,6 +790,7 @@ static int ib_uverbs_close(struct inode
+               kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
+       kref_put(&file->ref, ib_uverbs_release_file);
++      kobject_put(&dev->kobj);
+       return 0;
+ }
+@@ -873,10 +886,11 @@ static void ib_uverbs_add_one(struct ib_
+       if (!uverbs_dev)
+               return;
+-      kref_init(&uverbs_dev->ref);
++      atomic_set(&uverbs_dev->refcount, 1);
+       init_completion(&uverbs_dev->comp);
+       uverbs_dev->xrcd_tree = RB_ROOT;
+       mutex_init(&uverbs_dev->xrcd_tree_mutex);
++      kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
+       spin_lock(&map_lock);
+       devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
+@@ -903,6 +917,7 @@ static void ib_uverbs_add_one(struct ib_
+       cdev_init(&uverbs_dev->cdev, NULL);
+       uverbs_dev->cdev.owner = THIS_MODULE;
+       uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
++      uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj;
+       kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
+       if (cdev_add(&uverbs_dev->cdev, base, 1))
+               goto err_cdev;
+@@ -933,9 +948,10 @@ err_cdev:
+               clear_bit(devnum, overflow_map);
+ err:
+-      kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
++      if (atomic_dec_and_test(&uverbs_dev->refcount))
++              ib_uverbs_comp_dev(uverbs_dev);
+       wait_for_completion(&uverbs_dev->comp);
+-      kfree(uverbs_dev);
++      kobject_put(&uverbs_dev->kobj);
+       return;
+ }
+@@ -955,9 +971,10 @@ static void ib_uverbs_remove_one(struct
+       else
+               clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
+-      kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
++      if (atomic_dec_and_test(&uverbs_dev->refcount))
++              ib_uverbs_comp_dev(uverbs_dev);
+       wait_for_completion(&uverbs_dev->comp);
+-      kfree(uverbs_dev);
++      kobject_put(&uverbs_dev->kobj);
+ }
+ static char *uverbs_devnode(struct device *dev, umode_t *mode)
diff --git a/queue-3.14/ib-uverbs-reject-invalid-or-unknown-opcodes.patch b/queue-3.14/ib-uverbs-reject-invalid-or-unknown-opcodes.patch
new file mode 100644 (file)
index 0000000..5c68acc
--- /dev/null
@@ -0,0 +1,52 @@
+From b632ffa7cee439ba5dce3b3bc4a5cbe2b3e20133 Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Wed, 26 Aug 2015 11:00:37 +0200
+Subject: IB/uverbs: reject invalid or unknown opcodes
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit b632ffa7cee439ba5dce3b3bc4a5cbe2b3e20133 upstream.
+
+We have many WR opcodes that are only supported in kernel space
+and/or require optional information to be copied into the WR
+structure.  Reject all those not explicitly handled so that we
+can't pass invalid information to drivers.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
+Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/core/uverbs_cmd.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/drivers/infiniband/core/uverbs_cmd.c
++++ b/drivers/infiniband/core/uverbs_cmd.c
+@@ -2111,6 +2111,12 @@ ssize_t ib_uverbs_post_send(struct ib_uv
+               next->send_flags = user_wr->send_flags;
+               if (is_ud) {
++                      if (next->opcode != IB_WR_SEND &&
++                          next->opcode != IB_WR_SEND_WITH_IMM) {
++                              ret = -EINVAL;
++                              goto out_put;
++                      }
++
+                       next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah,
+                                                    file->ucontext);
+                       if (!next->wr.ud.ah) {
+@@ -2150,9 +2156,11 @@ ssize_t ib_uverbs_post_send(struct ib_uv
+                                       user_wr->wr.atomic.compare_add;
+                               next->wr.atomic.swap = user_wr->wr.atomic.swap;
+                               next->wr.atomic.rkey = user_wr->wr.atomic.rkey;
++                      case IB_WR_SEND:
+                               break;
+                       default:
+-                              break;
++                              ret = -EINVAL;
++                              goto out_put;
+                       }
+               }
index 4c21b681aa613665e5596c16c068e2042cdd562d..c82511771a180480f419c7f1d07e889a5663578c 100644 (file)
@@ -32,3 +32,12 @@ fs-if-a-coredump-already-exists-unlink-and-recreate-with-o_excl.patch
 mmc-core-fix-race-condition-in-mmc_wait_data_done.patch
 md-raid10-always-set-reshape_safe-when-initializing-reshape_position.patch
 xen-gntdev-convert-priv-lock-to-a-mutex.patch
+hfs-fix-b-tree-corruption-after-insertion-at-position-0.patch
+ib-qib-change-lkey-table-allocation-to-support-more-mrs.patch
+ib-uverbs-reject-invalid-or-unknown-opcodes.patch
+ib-uverbs-fix-race-between-ib_uverbs_open-and-remove_one.patch
+ib-mlx4-forbid-using-sysfs-to-change-roce-pkeys.patch
+ib-mlx4-use-correct-sl-on-ah-query-under-roce.patch
+stmmac-fix-check-for-phydev-being-open.patch
+stmmac-troubleshoot-unexpected-bits-in-des0-des1.patch
+hfs-hfsplus-cache-pages-correctly-between-bnode_create-and-bnode_free.patch
diff --git a/queue-3.14/stmmac-fix-check-for-phydev-being-open.patch b/queue-3.14/stmmac-fix-check-for-phydev-being-open.patch
new file mode 100644 (file)
index 0000000..992c1ad
--- /dev/null
@@ -0,0 +1,43 @@
+From dfc50fcaad574e5c8c85cbc83eca1426b2413fa4 Mon Sep 17 00:00:00 2001
+From: Alexey Brodkin <Alexey.Brodkin@synopsys.com>
+Date: Wed, 9 Sep 2015 18:01:08 +0300
+Subject: stmmac: fix check for phydev being open
+
+From: Alexey Brodkin <Alexey.Brodkin@synopsys.com>
+
+commit dfc50fcaad574e5c8c85cbc83eca1426b2413fa4 upstream.
+
+Current check of phydev with IS_ERR(phydev) may make not much sense
+because of_phy_connect() returns NULL on failure instead of error value.
+
+Still for checking result of phy_connect() IS_ERR() makes perfect sense.
+
+So let's use combined check IS_ERR_OR_NULL() that covers both cases.
+
+Cc: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
+Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+Cc: linux-kernel@vger.kernel.org
+Cc: David Miller <davem@davemloft.net>
+Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -810,8 +810,11 @@ static int stmmac_init_phy(struct net_de
+       phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link, interface);
+-      if (IS_ERR(phydev)) {
++      if (IS_ERR_OR_NULL(phydev)) {
+               pr_err("%s: Could not attach to PHY\n", dev->name);
++              if (!phydev)
++                      return -ENODEV;
++
+               return PTR_ERR(phydev);
+       }
diff --git a/queue-3.14/stmmac-troubleshoot-unexpected-bits-in-des0-des1.patch b/queue-3.14/stmmac-troubleshoot-unexpected-bits-in-des0-des1.patch
new file mode 100644 (file)
index 0000000..cdc823b
--- /dev/null
@@ -0,0 +1,169 @@
+From f1590670ce069eefeb93916391a67643e6ad1630 Mon Sep 17 00:00:00 2001
+From: Alexey Brodkin <Alexey.Brodkin@synopsys.com>
+Date: Wed, 24 Jun 2015 11:47:41 +0300
+Subject: stmmac: troubleshoot unexpected bits in des0 & des1
+
+From: Alexey Brodkin <Alexey.Brodkin@synopsys.com>
+
+commit f1590670ce069eefeb93916391a67643e6ad1630 upstream.
+
+Current implementation of descriptor init procedure only takes
+care about setting/clearing ownership flag in "des0"/"des1"
+fields while it is perfectly possible to get unexpected bits
+set because of the following factors:
+
+ [1] On driver probe underlying memory allocated with
+     dma_alloc_coherent() might not be zeroed and so
+     it will be filled with garbage.
+
+ [2] During driver operation some bits could be set by SD/MMC
+     controller (for example error flags etc).
+
+And unexpected and/or randomly set flags in "des0"/"des1"
+fields may lead to unpredictable behavior of GMAC DMA block.
+
+This change addresses both items above with:
+
+ [1] Use of dma_zalloc_coherent() instead of simple
+     dma_alloc_coherent() to make sure allocated memory is
+     zeroed. That shouldn't affect performance because
+     this allocation only happens once on driver probe.
+
+ [2] Do explicit zeroing of both "des0" and "des1" fields
+     of all buffer descriptors during initialization of
+     DMA transfer.
+
+And while at it fixed identation of dma_free_coherent()
+counterpart as well.
+
+Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
+Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+Cc: arc-linux-dev@synopsys.com
+Cc: linux-kernel@vger.kernel.org
+Cc: stable@vger.kernel.org
+Cc: David Miller <davem@davemloft.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/stmicro/stmmac/descs.h       |    2 +
+ drivers/net/ethernet/stmicro/stmmac/enh_desc.c    |    3 +
+ drivers/net/ethernet/stmicro/stmmac/norm_desc.c   |    3 +
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |   44 +++++++++++-----------
+ 4 files changed, 28 insertions(+), 24 deletions(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/descs.h
++++ b/drivers/net/ethernet/stmicro/stmmac/descs.h
+@@ -158,6 +158,8 @@ struct dma_desc {
+                       u32 buffer2_size:13;
+                       u32 reserved4:3;
+               } etx;          /* -- enhanced -- */
++
++              u64 all_flags;
+       } des01;
+       unsigned int des2;
+       unsigned int des3;
+--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
++++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+@@ -240,6 +240,7 @@ static int enh_desc_get_rx_status(void *
+ static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
+                                 int mode, int end)
+ {
++      p->des01.all_flags = 0;
+       p->des01.erx.own = 1;
+       p->des01.erx.buffer1_size = BUF_SIZE_8KiB - 1;
+@@ -254,7 +255,7 @@ static void enh_desc_init_rx_desc(struct
+ static void enh_desc_init_tx_desc(struct dma_desc *p, int mode, int end)
+ {
+-      p->des01.etx.own = 0;
++      p->des01.all_flags = 0;
+       if (mode == STMMAC_CHAIN_MODE)
+               ehn_desc_tx_set_on_chain(p, end);
+       else
+--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
++++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+@@ -123,6 +123,7 @@ static int ndesc_get_rx_status(void *dat
+ static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode,
+                              int end)
+ {
++      p->des01.all_flags = 0;
+       p->des01.rx.own = 1;
+       p->des01.rx.buffer1_size = BUF_SIZE_2KiB - 1;
+@@ -137,7 +138,7 @@ static void ndesc_init_rx_desc(struct dm
+ static void ndesc_init_tx_desc(struct dma_desc *p, int mode, int end)
+ {
+-      p->des01.tx.own = 0;
++      p->des01.all_flags = 0;
+       if (mode == STMMAC_CHAIN_MODE)
+               ndesc_tx_set_on_chain(p, end);
+       else
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -1145,41 +1145,41 @@ static int alloc_dma_desc_resources(stru
+               goto err_tx_skbuff;
+       if (priv->extend_desc) {
+-              priv->dma_erx = dma_alloc_coherent(priv->device, rxsize *
+-                                                 sizeof(struct
+-                                                        dma_extended_desc),
+-                                                 &priv->dma_rx_phy,
+-                                                 GFP_KERNEL);
++              priv->dma_erx = dma_zalloc_coherent(priv->device, rxsize *
++                                                  sizeof(struct
++                                                         dma_extended_desc),
++                                                  &priv->dma_rx_phy,
++                                                  GFP_KERNEL);
+               if (!priv->dma_erx)
+                       goto err_dma;
+-              priv->dma_etx = dma_alloc_coherent(priv->device, txsize *
+-                                                 sizeof(struct
+-                                                        dma_extended_desc),
+-                                                 &priv->dma_tx_phy,
+-                                                 GFP_KERNEL);
++              priv->dma_etx = dma_zalloc_coherent(priv->device, txsize *
++                                                  sizeof(struct
++                                                         dma_extended_desc),
++                                                  &priv->dma_tx_phy,
++                                                  GFP_KERNEL);
+               if (!priv->dma_etx) {
+                       dma_free_coherent(priv->device, priv->dma_rx_size *
+-                                      sizeof(struct dma_extended_desc),
+-                                      priv->dma_erx, priv->dma_rx_phy);
++                                        sizeof(struct dma_extended_desc),
++                                        priv->dma_erx, priv->dma_rx_phy);
+                       goto err_dma;
+               }
+       } else {
+-              priv->dma_rx = dma_alloc_coherent(priv->device, rxsize *
+-                                                sizeof(struct dma_desc),
+-                                                &priv->dma_rx_phy,
+-                                                GFP_KERNEL);
++              priv->dma_rx = dma_zalloc_coherent(priv->device, rxsize *
++                                                 sizeof(struct dma_desc),
++                                                 &priv->dma_rx_phy,
++                                                 GFP_KERNEL);
+               if (!priv->dma_rx)
+                       goto err_dma;
+-              priv->dma_tx = dma_alloc_coherent(priv->device, txsize *
+-                                                sizeof(struct dma_desc),
+-                                                &priv->dma_tx_phy,
+-                                                GFP_KERNEL);
++              priv->dma_tx = dma_zalloc_coherent(priv->device, txsize *
++                                                 sizeof(struct dma_desc),
++                                                 &priv->dma_tx_phy,
++                                                 GFP_KERNEL);
+               if (!priv->dma_tx) {
+                       dma_free_coherent(priv->device, priv->dma_rx_size *
+-                                      sizeof(struct dma_desc),
+-                                      priv->dma_rx, priv->dma_rx_phy);
++                                        sizeof(struct dma_desc),
++                                        priv->dma_rx, priv->dma_rx_phy);
+                       goto err_dma;
+               }
+       }