3.10-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 6 Jan 2014 22:09:24 +0000 (14:09 -0800)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 6 Jan 2014 22:09:24 +0000 (14:09 -0800)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 6 Jan 2014 22:09:24 +0000 (14:09 -0800)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 6 Jan 2014 22:09:24 +0000 (14:09 -0800)
diff --git a/queue-3.10/ceph-add-check-returned-value-on-func-ceph_calc_ceph_pg.patch b/queue-3.10/ceph-add-check-returned-value-on-func-ceph_calc_ceph_pg.patch

new file mode 100644 (file)

index 0000000..eb927cc
--- /dev/null
+++ b/queue-3.10/ceph-add-check-returned-value-on-func-ceph_calc_ceph_pg.patch
@@ -0,0 +1,37 @@
+From 2fbcbff1d6b9243ef71c64a8ab993bc3c7bb7af1 Mon Sep 17 00:00:00 2001
+From: majianpeng <majianpeng@gmail.com>
+Date: Fri, 2 Aug 2013 18:14:48 +0800
+Subject: ceph: Add check returned value on func ceph_calc_ceph_pg.
+
+From: majianpeng <majianpeng@gmail.com>
+
+commit 2fbcbff1d6b9243ef71c64a8ab993bc3c7bb7af1 upstream.
+
+Func ceph_calc_ceph_pg maybe failed.So add check for returned value.
+
+Signed-off-by: Jianpeng Ma <majianpeng@gmail.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/ioctl.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/fs/ceph/ioctl.c
++++ b/fs/ceph/ioctl.c
+@@ -211,8 +211,12 @@ static long ceph_ioctl_get_dataloc(struc
+       snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
+                ceph_ino(inode), dl.object_no);
+ 
+-      ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap,
+-              ceph_file_layout_pg_pool(ci->i_layout));
++      r = ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap,
++                              ceph_file_layout_pg_pool(ci->i_layout));
++      if (r < 0) {
++              up_read(&osdc->map_sem);
++              return r;
++      }
+ 
+       dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
+       if (dl.osd >= 0) {
diff --git a/queue-3.10/ceph-allow-sync_read-write-return-partial-successed-size-of-read-write.patch b/queue-3.10/ceph-allow-sync_read-write-return-partial-successed-size-of-read-write.patch

new file mode 100644 (file)

index 0000000..8033ec4
--- /dev/null
+++ b/queue-3.10/ceph-allow-sync_read-write-return-partial-successed-size-of-read-write.patch
@@ -0,0 +1,41 @@
+From ee7289bfadda5f4ef60884547ebc9989c8fb314a Mon Sep 17 00:00:00 2001
+From: majianpeng <majianpeng@gmail.com>
+Date: Wed, 21 Aug 2013 15:02:51 +0800
+Subject: ceph: allow sync_read/write return partial successed size of read/write.
+
+From: majianpeng <majianpeng@gmail.com>
+
+commit ee7289bfadda5f4ef60884547ebc9989c8fb314a upstream.
+
+For sync_read/write, it may do multi stripe operations.If one of those
+met erro, we return the former successed size rather than a error value.
+There is a exception for write-operation met -EOLDSNAPC.If this occur,we
+retry the whole write again.
+
+Signed-off-by: Jianpeng Ma <majianpeng@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/file.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -373,7 +373,7 @@ more:
+                       goto more;
+       }
+ 
+-      if (ret >= 0) {
++      if (read > 0) {
+               ret = read;
+               /* did we bounce off eof? */
+               if (pos + left > inode->i_size)
+@@ -611,6 +611,8 @@ out:
+               if (check_caps)
+                       ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY,
+                                       NULL);
++      } else if (ret != -EOLDSNAPC && written > 0) {
++              ret = written;
+       }
+       return ret;
+ }
diff --git a/queue-3.10/ceph-avoid-accessing-invalid-memory.patch b/queue-3.10/ceph-avoid-accessing-invalid-memory.patch

new file mode 100644 (file)

index 0000000..19bbbe2
--- /dev/null
+++ b/queue-3.10/ceph-avoid-accessing-invalid-memory.patch
@@ -0,0 +1,74 @@
+From 5446429630257f4723829409337a26c076907d5d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sasha.levin@oracle.com>
+Date: Mon, 1 Jul 2013 18:33:39 -0400
+Subject: ceph: avoid accessing invalid memory
+
+From: Sasha Levin <sasha.levin@oracle.com>
+
+commit 5446429630257f4723829409337a26c076907d5d upstream.
+
+when mounting ceph with a dev name that starts with a slash, ceph
+would attempt to access the character before that slash. Since we
+don't actually own that byte of memory, we would trigger an
+invalid access:
+
+[   43.499934] BUG: unable to handle kernel paging request at ffff880fa3a97fff
+[   43.500984] IP: [<ffffffff818f3884>] parse_mount_options+0x1a4/0x300
+[   43.501491] PGD 743b067 PUD 10283c4067 PMD 10282a6067 PTE 8000000fa3a97060
+[   43.502301] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
+[   43.503006] Dumping ftrace buffer:
+[   43.503596]    (ftrace buffer empty)
+[   43.504046] CPU: 0 PID: 10879 Comm: mount Tainted: G        W    3.10.0-sasha #1129
+[   43.504851] task: ffff880fa625b000 ti: ffff880fa3412000 task.ti: ffff880fa3412000
+[   43.505608] RIP: 0010:[<ffffffff818f3884>]  [<ffffffff818f3884>] parse_mount_options$
+[   43.506552] RSP: 0018:ffff880fa3413d08  EFLAGS: 00010286
+[   43.507133] RAX: ffff880fa3a98000 RBX: ffff880fa3a98000 RCX: 0000000000000000
+[   43.507893] RDX: ffff880fa3a98001 RSI: 000000000000002f RDI: ffff880fa3a98000
+[   43.508610] RBP: ffff880fa3413d58 R08: 0000000000001f99 R09: ffff880fa3fe64c0
+[   43.509426] R10: ffff880fa3413d98 R11: ffff880fa38710d8 R12: ffff880fa3413da0
+[   43.509792] R13: ffff880fa3a97fff R14: 0000000000000000 R15: ffff880fa3413d90
+[   43.509792] FS:  00007fa9c48757e0(0000) GS:ffff880fd2600000(0000) knlGS:000000000000$
+[   43.509792] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
+[   43.509792] CR2: ffff880fa3a97fff CR3: 0000000fa3bb9000 CR4: 00000000000006b0
+[   43.509792] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[   43.509792] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
+[   43.509792] Stack:
+[   43.509792]  0000e5180000000e ffffffff85ca1900 ffff880fa38710d8 ffff880fa3413d98
+[   43.509792]  0000000000000120 0000000000000000 ffff880fa3a98000 0000000000000000
+[   43.509792]  ffffffff85cf32a0 0000000000000000 ffff880fa3413dc8 ffffffff818f3c72
+[   43.509792] Call Trace:
+[   43.509792]  [<ffffffff818f3c72>] ceph_mount+0xa2/0x390
+[   43.509792]  [<ffffffff81226314>] ? pcpu_alloc+0x334/0x3c0
+[   43.509792]  [<ffffffff81282f8d>] mount_fs+0x8d/0x1a0
+[   43.509792]  [<ffffffff812263d0>] ? __alloc_percpu+0x10/0x20
+[   43.509792]  [<ffffffff8129f799>] vfs_kern_mount+0x79/0x100
+[   43.509792]  [<ffffffff812a224d>] do_new_mount+0xcd/0x1c0
+[   43.509792]  [<ffffffff812a2e8d>] do_mount+0x15d/0x210
+[   43.509792]  [<ffffffff81220e55>] ? strndup_user+0x45/0x60
+[   43.509792]  [<ffffffff812a2fdd>] SyS_mount+0x9d/0xe0
+[   43.509792]  [<ffffffff83fd816c>] tracesys+0xdd/0xe2
+[   43.509792] Code: 4c 8b 5d c0 74 0a 48 8d 50 01 49 89 14 24 eb 17 31 c0 48 83 c9 ff $
+[   43.509792] RIP  [<ffffffff818f3884>] parse_mount_options+0x1a4/0x300
+[   43.509792]  RSP <ffff880fa3413d08>
+[   43.509792] CR2: ffff880fa3a97fff
+[   43.509792] ---[ end trace 22469cd81e93af51 ]---
+
+Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
+Reviewed-by: Sage Weil <sage@inktan.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/super.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ceph/super.c
++++ b/fs/ceph/super.c
+@@ -357,7 +357,7 @@ static int parse_mount_options(struct ce
+       }
+       err = -EINVAL;
+       dev_name_end--;         /* back up to ':' separator */
+-      if (*dev_name_end != ':') {
++      if (dev_name_end < dev_name || *dev_name_end != ':') {
+               pr_err("device name is missing path (no : separator in %s)\n",
+                               dev_name);
+               goto out;
diff --git a/queue-3.10/ceph-cleanup-types-in-striped_read.patch b/queue-3.10/ceph-cleanup-types-in-striped_read.patch

new file mode 100644 (file)

index 0000000..4702aff
--- /dev/null
+++ b/queue-3.10/ceph-cleanup-types-in-striped_read.patch
@@ -0,0 +1,53 @@
+From 688bac461ba3e9d221a879ab40b687f5d7b5b19c Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Tue, 23 Jul 2013 16:48:01 +0300
+Subject: ceph: cleanup types in striped_read()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit 688bac461ba3e9d221a879ab40b687f5d7b5b19c upstream.
+
+We pass in a u64 value for "len" and then immediately truncate away the
+upper 32 bits.
+
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <alex.elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/file.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -313,9 +313,9 @@ static int striped_read(struct inode *in
+ {
+       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+       struct ceph_inode_info *ci = ceph_inode(inode);
+-      u64 pos, this_len;
++      u64 pos, this_len, left;
+       int io_align, page_align;
+-      int left, pages_left;
++      int pages_left;
+       int read;
+       struct page **page_pos;
+       int ret;
+@@ -346,7 +346,7 @@ more:
+               ret = 0;
+       hit_stripe = this_len < left;
+       was_short = ret >= 0 && ret < this_len;
+-      dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read,
++      dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
+            ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
+ 
+       if (ret > 0) {
+@@ -378,7 +378,7 @@ more:
+                       if (pos + left > inode->i_size)
+                               left = inode->i_size - pos;
+ 
+-                      dout("zero tail %d\n", left);
++                      dout("zero tail %llu\n", left);
+                       ceph_zero_page_vector_range(page_align + read, left,
+                                                   pages);
+                       read += left;
diff --git a/queue-3.10/ceph-fix-bugs-about-handling-short-read-for-sync-read-mode.patch b/queue-3.10/ceph-fix-bugs-about-handling-short-read-for-sync-read-mode.patch

new file mode 100644 (file)

index 0000000..12f489b
--- /dev/null
+++ b/queue-3.10/ceph-fix-bugs-about-handling-short-read-for-sync-read-mode.patch
@@ -0,0 +1,117 @@
+From 02ae66d8b229708fd94b764f6c17ead1c7741fcf Mon Sep 17 00:00:00 2001
+From: majianpeng <majianpeng@gmail.com>
+Date: Tue, 6 Aug 2013 16:20:38 +0800
+Subject: ceph: fix bugs about handling short-read for sync read mode.
+
+From: majianpeng <majianpeng@gmail.com>
+
+commit 02ae66d8b229708fd94b764f6c17ead1c7741fcf upstream.
+
+cephfs . show_layout
+>layyout.data_pool:     0
+>layout.object_size:   4194304
+>layout.stripe_unit:   4194304
+>layout.stripe_count:  1
+
+TestA:
+>dd if=/dev/urandom of=test bs=1M count=2 oflag=direct
+>dd if=/dev/urandom of=test bs=1M count=2 seek=4  oflag=direct
+>dd if=test of=/dev/null bs=6M count=1 iflag=direct
+The messages from func striped_read are:
+ceph:           file.c:350  : striped_read 0~6291456 (read 0) got 2097152 HITSTRIPE SHORT
+ceph:           file.c:350  : striped_read 2097152~4194304 (read 2097152) got 0 HITSTRIPE SHORT
+ceph:           file.c:381  : zero tail 4194304
+ceph:           file.c:390  : striped_read returns 6291456
+The hole of file is from 2M--4M.But actualy it zero the last 4M include
+the last 2M area which isn't a hole.
+Using this patch, the messages are:
+ceph:           file.c:350  : striped_read 0~6291456 (read 0) got 2097152 HITSTRIPE SHORT
+ceph:           file.c:358  :  zero gap 2097152 to 4194304
+ceph:           file.c:350  : striped_read 4194304~2097152 (read 4194304) got 2097152
+ceph:           file.c:384  : striped_read returns 6291456
+
+TestB:
+>echo majianpeng > test
+>dd if=test of=/dev/null bs=2M count=1 iflag=direct
+The messages are:
+ceph:           file.c:350  : striped_read 0~6291456 (read 0) got 11 HITSTRIPE SHORT
+ceph:           file.c:350  : striped_read 11~6291445 (read 11) got 0 HITSTRIPE SHORT
+ceph:           file.c:390  : striped_read returns 11
+For this case,it did once more striped_read.It's no meaningless.
+Using this patch, the message are:
+ceph:           file.c:350  : striped_read 0~6291456 (read 0) got 11 HITSTRIPE SHORT
+ceph:           file.c:384  : striped_read returns 11
+
+Big thanks to Yan Zheng for the patch.
+
+Reviewed-by: Yan, Zheng <zheng.z.yan@intel.com>
+Signed-off-by: Jianpeng Ma <majianpeng@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/file.c |   39 ++++++++++++++++-----------------------
+ 1 file changed, 16 insertions(+), 23 deletions(-)
+
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -349,44 +349,37 @@ more:
+       dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
+            ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
+ 
+-      if (ret > 0) {
+-              int didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
+-
+-              if (read < pos - off) {
+-                      dout(" zero gap %llu to %llu\n", off + read, pos);
+-                      ceph_zero_page_vector_range(page_align + read,
+-                                                  pos - off - read, pages);
++      if (ret >= 0) {
++              int didpages;
++              if (was_short && (pos + ret < inode->i_size)) {
++                      u64 tmp = min(this_len - ret,
++                                      inode->i_size - pos - ret);
++                      dout(" zero gap %llu to %llu\n",
++                              pos + ret, pos + ret + tmp);
++                      ceph_zero_page_vector_range(page_align + read + ret,
++                                                      tmp, pages);
++                      ret += tmp;
+               }
++
++              didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
+               pos += ret;
+               read = pos - off;
+               left -= ret;
+               page_pos += didpages;
+               pages_left -= didpages;
+ 
+-              /* hit stripe? */
+-              if (left && hit_stripe)
++              /* hit stripe and need continue*/
++              if (left && hit_stripe && pos < inode->i_size)
+                       goto more;
+       }
+ 
+-      if (was_short) {
++      if (ret >= 0) {
++              ret = read;
+               /* did we bounce off eof? */
+               if (pos + left > inode->i_size)
+                       *checkeof = 1;
+-
+-              /* zero trailing bytes (inside i_size) */
+-              if (left > 0 && pos < inode->i_size) {
+-                      if (pos + left > inode->i_size)
+-                              left = inode->i_size - pos;
+-
+-                      dout("zero tail %llu\n", left);
+-                      ceph_zero_page_vector_range(page_align + read, left,
+-                                                  pages);
+-                      read += left;
+-              }
+       }
+ 
+-      if (ret >= 0)
+-              ret = read;
+       dout("striped_read returns %d\n", ret);
+       return ret;
+ }
diff --git a/queue-3.10/ceph-fix-null-pointer-dereference.patch b/queue-3.10/ceph-fix-null-pointer-dereference.patch

new file mode 100644 (file)

index 0000000..b5f4a24
--- /dev/null
+++ b/queue-3.10/ceph-fix-null-pointer-dereference.patch
@@ -0,0 +1,34 @@
+From c338c07c51e3106711fad5eb599e375eadb6855d Mon Sep 17 00:00:00 2001
+From: Nathaniel Yazdani <n1ght.4nd.d4y@gmail.com>
+Date: Sun, 4 Aug 2013 21:04:30 -0700
+Subject: ceph: fix null pointer dereference
+
+From: Nathaniel Yazdani <n1ght.4nd.d4y@gmail.com>
+
+commit c338c07c51e3106711fad5eb599e375eadb6855d upstream.
+
+When register_session() is given an out-of-range argument for mds,
+ceph_mdsmap_get_addr() will return a null pointer, which would be given to
+ceph_con_open() & be dereferenced, causing a kernel oops. This fixes bug #4685
+in the Ceph bug tracker <http://tracker.ceph.com/issues/4685>.
+
+Signed-off-by: Nathaniel Yazdani <n1ght.4nd.d4y@gmail.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/mds_client.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -414,6 +414,9 @@ static struct ceph_mds_session *register
+ {
+       struct ceph_mds_session *s;
+ 
++      if (mds >= mdsc->mdsmap->m_max_mds)
++              return ERR_PTR(-EINVAL);
++
+       s = kzalloc(sizeof(*s), GFP_NOFS);
+       if (!s)
+               return ERR_PTR(-ENOMEM);
diff --git a/queue-3.10/ceph-free-mdsc-if-alloc-mdsc-mdsmap-failed.patch b/queue-3.10/ceph-free-mdsc-if-alloc-mdsc-mdsmap-failed.patch

new file mode 100644 (file)

index 0000000..e4a1a87
--- /dev/null
+++ b/queue-3.10/ceph-free-mdsc-if-alloc-mdsc-mdsmap-failed.patch
@@ -0,0 +1,31 @@
+From fb3101b6f0db9ae3f35dc8e6ec908d0af8cdf12e Mon Sep 17 00:00:00 2001
+From: majianpeng <majianpeng@gmail.com>
+Date: Tue, 25 Jun 2013 14:48:19 +0800
+Subject: ceph: Free mdsc if alloc mdsc->mdsmap failed.
+
+From: majianpeng <majianpeng@gmail.com>
+
+commit fb3101b6f0db9ae3f35dc8e6ec908d0af8cdf12e upstream.
+
+Signed-off-by: Jianpeng Ma <majianpeng@gmail.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/mds_client.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -3044,8 +3044,10 @@ int ceph_mdsc_init(struct ceph_fs_client
+       fsc->mdsc = mdsc;
+       mutex_init(&mdsc->mutex);
+       mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
+-      if (mdsc->mdsmap == NULL)
++      if (mdsc->mdsmap == NULL) {
++              kfree(mdsc);
+               return -ENOMEM;
++      }
+ 
+       init_completion(&mdsc->safe_umount_waiters);
+       init_waitqueue_head(&mdsc->session_close_wq);
diff --git a/queue-3.10/ceph-improve-error-handling-in-ceph_mdsmap_decode.patch b/queue-3.10/ceph-improve-error-handling-in-ceph_mdsmap_decode.patch

new file mode 100644 (file)

index 0000000..ad2563f
--- /dev/null
+++ b/queue-3.10/ceph-improve-error-handling-in-ceph_mdsmap_decode.patch
@@ -0,0 +1,43 @@
+From c213b50b7dcbf06abcfbf1e4eee5b76586718bd9 Mon Sep 17 00:00:00 2001
+From: Emil Goode <emilgoode@gmail.com>
+Date: Tue, 28 May 2013 16:59:00 +0200
+Subject: ceph: improve error handling in ceph_mdsmap_decode
+
+From: Emil Goode <emilgoode@gmail.com>
+
+commit c213b50b7dcbf06abcfbf1e4eee5b76586718bd9 upstream.
+
+This patch makes the following improvements to the error handling
+in the ceph_mdsmap_decode function:
+
+- Add a NULL check for return value from kcalloc
+- Make use of the variable err
+
+Signed-off-by: Emil Goode <emilgoode@gmail.com>
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/mdsmap.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/ceph/mdsmap.c
++++ b/fs/ceph/mdsmap.c
+@@ -138,6 +138,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(v
+                               m->m_info[mds].export_targets =
+                                       kcalloc(num_export_targets, sizeof(u32),
+                                               GFP_NOFS);
++                              if (m->m_info[mds].export_targets == NULL)
++                                      goto badmem;
+                               for (j = 0; j < num_export_targets; j++)
+                                       m->m_info[mds].export_targets[j] =
+                                              ceph_decode_32(&pexport_targets);
+@@ -170,7 +172,7 @@ bad:
+                      DUMP_PREFIX_OFFSET, 16, 1,
+                      start, end - start, true);
+       ceph_mdsmap_destroy(m);
+-      return ERR_PTR(-EINVAL);
++      return ERR_PTR(err);
+ }
+ 
+ void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
diff --git a/queue-3.10/libceph-add-function-to-ensure-notifies-are-complete.patch b/queue-3.10/libceph-add-function-to-ensure-notifies-are-complete.patch

new file mode 100644 (file)

index 0000000..64f56dc
--- /dev/null
+++ b/queue-3.10/libceph-add-function-to-ensure-notifies-are-complete.patch
@@ -0,0 +1,60 @@
+From dd935f44a40f8fb02aff2cc0df2269c92422df1c Mon Sep 17 00:00:00 2001
+From: Josh Durgin <josh.durgin@inktank.com>
+Date: Wed, 28 Aug 2013 21:43:09 -0700
+Subject: libceph: add function to ensure notifies are complete
+
+From: Josh Durgin <josh.durgin@inktank.com>
+
+commit dd935f44a40f8fb02aff2cc0df2269c92422df1c upstream.
+
+Without a way to flush the osd client's notify workqueue, a watch
+event that is unregistered could continue receiving callbacks
+indefinitely.
+
+Unregistering the event simply means no new notifies are added to the
+queue, but there may still be events in the queue that will call the
+watch callback for the event. If the queue is flushed after the event
+is unregistered, the caller can be sure no more watch callbacks will
+occur for the canceled watch.
+
+Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/ceph/osd_client.h |    2 ++
+ net/ceph/osd_client.c           |   11 +++++++++++
+ 2 files changed, 13 insertions(+)
+
+--- a/include/linux/ceph/osd_client.h
++++ b/include/linux/ceph/osd_client.h
+@@ -335,6 +335,8 @@ extern int ceph_osdc_wait_request(struct
+                                 struct ceph_osd_request *req);
+ extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
+ 
++extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc);
++
+ extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
+                              struct ceph_vino vino,
+                              struct ceph_file_layout *layout,
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -2209,6 +2209,17 @@ void ceph_osdc_sync(struct ceph_osd_clie
+ EXPORT_SYMBOL(ceph_osdc_sync);
+ 
+ /*
++ * Call all pending notify callbacks - for use after a watch is
++ * unregistered, to make sure no more callbacks for it will be invoked
++ */
++extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc)
++{
++      flush_workqueue(osdc->notify_wq);
++}
++EXPORT_SYMBOL(ceph_osdc_flush_notifies);
++
++
++/*
+  * init, shutdown
+  */
+ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
diff --git a/queue-3.10/libceph-add-lingering-request-reference-when-registered.patch b/queue-3.10/libceph-add-lingering-request-reference-when-registered.patch

new file mode 100644 (file)

index 0000000..5e1ef12
--- /dev/null
+++ b/queue-3.10/libceph-add-lingering-request-reference-when-registered.patch
@@ -0,0 +1,99 @@
+From 96e4dac66f69d28af2b736e723364efbbdf9fdee Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 22 May 2013 20:54:25 -0500
+Subject: libceph: add lingering request reference when registered
+
+From: Alex Elder <elder@inktank.com>
+
+commit 96e4dac66f69d28af2b736e723364efbbdf9fdee upstream.
+
+When an osd request is set to linger, the osd client holds onto the
+request so it can be re-submitted following certain osd map changes.
+The osd client holds a reference to the request until it is
+unregistered.  This is used by rbd for watch requests.
+
+Currently, the reference is taken when the request is marked with
+the linger flag.  This means that if an error occurs after that
+time but before the the request completes successfully, that
+reference is leaked.
+
+There's really no reason to take the reference until the request is
+registered in the the osd client's list of lingering requests, and
+that only happens when the lingering (watch) request completes
+successfully.
+
+So take that reference only when it gets registered following
+succesful completion, and drop it (as before) when the request
+gets unregistered.  This avoids the reference problem on error
+in rbd.
+
+Rearrange ceph_osdc_unregister_linger_request() to avoid using
+the request pointer after it may have been freed.
+
+And hold an extra reference in kick_requests() while handling
+a linger request that has not yet been registered, to ensure
+it doesn't go away.
+
+This resolves:
+    http://tracker.ceph.com/issues/3859
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ceph/osd_client.c |   12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -1174,6 +1174,7 @@ static void __register_linger_request(st
+                                   struct ceph_osd_request *req)
+ {
+       dout("__register_linger_request %p\n", req);
++      ceph_osdc_get_request(req);
+       list_add_tail(&req->r_linger_item, &osdc->req_linger);
+       if (req->r_osd)
+               list_add_tail(&req->r_linger_osd,
+@@ -1196,6 +1197,7 @@ static void __unregister_linger_request(
+               if (list_empty(&req->r_osd_item))
+                       req->r_osd = NULL;
+       }
++      ceph_osdc_put_request(req);
+ }
+ 
+ void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
+@@ -1203,9 +1205,8 @@ void ceph_osdc_unregister_linger_request
+ {
+       mutex_lock(&osdc->request_mutex);
+       if (req->r_linger) {
+-              __unregister_linger_request(osdc, req);
+               req->r_linger = 0;
+-              ceph_osdc_put_request(req);
++              __unregister_linger_request(osdc, req);
+       }
+       mutex_unlock(&osdc->request_mutex);
+ }
+@@ -1217,11 +1218,6 @@ void ceph_osdc_set_request_linger(struct
+       if (!req->r_linger) {
+               dout("set_request_linger %p\n", req);
+               req->r_linger = 1;
+-              /*
+-               * caller is now responsible for calling
+-               * unregister_linger_request
+-               */
+-              ceph_osdc_get_request(req);
+       }
+ }
+ EXPORT_SYMBOL(ceph_osdc_set_request_linger);
+@@ -1633,8 +1629,10 @@ static void kick_requests(struct ceph_os
+                       dout("%p tid %llu restart on osd%d\n",
+                            req, req->r_tid,
+                            req->r_osd ? req->r_osd->o_osd : -1);
++                      ceph_osdc_get_request(req);
+                       __unregister_request(osdc, req);
+                       __register_linger_request(osdc, req);
++                      ceph_osdc_put_request(req);
+                       continue;
+               }
+ 
diff --git a/queue-3.10/libceph-call-r_unsafe_callback-when-unsafe-reply-is-received.patch b/queue-3.10/libceph-call-r_unsafe_callback-when-unsafe-reply-is-received.patch

new file mode 100644 (file)

index 0000000..41c4132
--- /dev/null
+++ b/queue-3.10/libceph-call-r_unsafe_callback-when-unsafe-reply-is-received.patch
@@ -0,0 +1,74 @@
+From 61c5d6bf7074ee32d014dcdf7698dc8c59eb712d Mon Sep 17 00:00:00 2001
+From: "Yan, Zheng" <zheng.z.yan@intel.com>
+Date: Mon, 24 Jun 2013 14:41:27 +0800
+Subject: libceph: call r_unsafe_callback when unsafe reply is received
+
+From: "Yan, Zheng" <zheng.z.yan@intel.com>
+
+commit 61c5d6bf7074ee32d014dcdf7698dc8c59eb712d upstream.
+
+We can't use !req->r_sent to check if OSD request is sent for the
+first time, this is because __cancel_request() zeros req->r_sent
+when OSD map changes. Rather than adding a new variable to struct
+ceph_osd_request to indicate if it's sent for the first time, We
+can call the unsafe callback only when unsafe OSD reply is received.
+If OSD's first reply is safe, just skip calling the unsafe callback.
+
+The purpose of unsafe callback is adding unsafe request to a list,
+so that fsync(2) can wait for the safe reply. fsync(2) doesn't need
+to wait for a write(2) that hasn't returned yet. So it's OK to add
+request to the unsafe list when the first OSD reply is received.
+(ceph_sync_write() returns after receiving the first OSD reply)
+
+Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ceph/osd_client.c |   14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -1337,10 +1337,6 @@ static void __send_request(struct ceph_o
+ 
+       ceph_msg_get(req->r_request); /* send consumes a ref */
+ 
+-      /* Mark the request unsafe if this is the first timet's being sent. */
+-
+-      if (!req->r_sent && req->r_unsafe_callback)
+-              req->r_unsafe_callback(req, true);
+       req->r_sent = req->r_osd->o_incarnation;
+ 
+       ceph_con_send(&req->r_osd->o_con, req->r_request);
+@@ -1431,8 +1427,6 @@ static void handle_osds_timeout(struct w
+ 
+ static void complete_request(struct ceph_osd_request *req)
+ {
+-      if (req->r_unsafe_callback)
+-              req->r_unsafe_callback(req, false);
+       complete_all(&req->r_safe_completion);  /* fsync waiter */
+ }
+ 
+@@ -1559,14 +1553,20 @@ static void handle_reply(struct ceph_osd
+       mutex_unlock(&osdc->request_mutex);
+ 
+       if (!already_completed) {
++              if (req->r_unsafe_callback &&
++                  result >= 0 && !(flags & CEPH_OSD_FLAG_ONDISK))
++                      req->r_unsafe_callback(req, true);
+               if (req->r_callback)
+                       req->r_callback(req, msg);
+               else
+                       complete_all(&req->r_completion);
+       }
+ 
+-      if (flags & CEPH_OSD_FLAG_ONDISK)
++      if (flags & CEPH_OSD_FLAG_ONDISK) {
++              if (req->r_unsafe_callback && already_completed)
++                      req->r_unsafe_callback(req, false);
+               complete_request(req);
++      }
+ 
+ done:
+       dout("req=%p req->r_linger=%d\n", req, req->r_linger);
diff --git a/queue-3.10/libceph-create_singlethread_workqueue-doesn-t-return-err_ptrs.patch b/queue-3.10/libceph-create_singlethread_workqueue-doesn-t-return-err_ptrs.patch

new file mode 100644 (file)

index 0000000..cb317e0
--- /dev/null
+++ b/queue-3.10/libceph-create_singlethread_workqueue-doesn-t-return-err_ptrs.patch
@@ -0,0 +1,40 @@
+From dbcae088fa660086bde6e10d63bb3c9264832d85 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Thu, 15 Aug 2013 08:58:59 +0300
+Subject: libceph: create_singlethread_workqueue() doesn't return ERR_PTRs
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit dbcae088fa660086bde6e10d63bb3c9264832d85 upstream.
+
+create_singlethread_workqueue() returns NULL on error, and it doesn't
+return ERR_PTRs.
+
+I tweaked the error handling a little to be consistent with earlier in
+the function.
+
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ceph/osd_client.c |    6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -2257,12 +2257,10 @@ int ceph_osdc_init(struct ceph_osd_clien
+       if (err < 0)
+               goto out_msgpool;
+ 
++      err = -ENOMEM;
+       osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify");
+-      if (IS_ERR(osdc->notify_wq)) {
+-              err = PTR_ERR(osdc->notify_wq);
+-              osdc->notify_wq = NULL;
++      if (!osdc->notify_wq)
+               goto out_msgpool;
+-      }
+       return 0;
+ 
+ out_msgpool:
diff --git a/queue-3.10/libceph-fix-error-handling-in-handle_reply.patch b/queue-3.10/libceph-fix-error-handling-in-handle_reply.patch

new file mode 100644 (file)

index 0000000..3d2fd08
--- /dev/null
+++ b/queue-3.10/libceph-fix-error-handling-in-handle_reply.patch
@@ -0,0 +1,50 @@
+From 1874119664dafda3ef2ed9b51b4759a9540d4a1a Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Thu, 15 Aug 2013 08:51:58 +0300
+Subject: libceph: fix error handling in handle_reply()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit 1874119664dafda3ef2ed9b51b4759a9540d4a1a upstream.
+
+We've tried to fix the error paths in this function before, but there
+is still a hidden goto in the ceph_decode_need() macro which goes to the
+wrong place.  We need to release the "req" and unlock a mutex before
+returning.
+
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ceph/osd_client.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -1488,14 +1488,14 @@ static void handle_reply(struct ceph_osd
+       dout("handle_reply %p tid %llu req %p result %d\n", msg, tid,
+            req, result);
+ 
+-      ceph_decode_need(&p, end, 4, bad);
++      ceph_decode_need(&p, end, 4, bad_put);
+       numops = ceph_decode_32(&p);
+       if (numops > CEPH_OSD_MAX_OP)
+               goto bad_put;
+       if (numops != req->r_num_ops)
+               goto bad_put;
+       payload_len = 0;
+-      ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad);
++      ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad_put);
+       for (i = 0; i < numops; i++) {
+               struct ceph_osd_op *op = p;
+               int len;
+@@ -1513,7 +1513,7 @@ static void handle_reply(struct ceph_osd
+               goto bad_put;
+       }
+ 
+-      ceph_decode_need(&p, end, 4 + numops * 4, bad);
++      ceph_decode_need(&p, end, 4 + numops * 4, bad_put);
+       retry_attempt = ceph_decode_32(&p);
+       for (i = 0; i < numops; i++)
+               req->r_reply_op_result[i] = ceph_decode_32(&p);
diff --git a/queue-3.10/libceph-fix-safe-completion.patch b/queue-3.10/libceph-fix-safe-completion.patch

new file mode 100644 (file)

index 0000000..03eed66
--- /dev/null
+++ b/queue-3.10/libceph-fix-safe-completion.patch
@@ -0,0 +1,73 @@
+From eb845ff13a44477f8a411baedbf11d678b9daf0a Mon Sep 17 00:00:00 2001
+From: "Yan, Zheng" <zheng.z.yan@intel.com>
+Date: Fri, 31 May 2013 15:54:44 +0800
+Subject: libceph: fix safe completion
+
+From: "Yan, Zheng" <zheng.z.yan@intel.com>
+
+commit eb845ff13a44477f8a411baedbf11d678b9daf0a upstream.
+
+handle_reply() calls complete_request() only if the first OSD reply
+has ONDISK flag.
+
+Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/ceph/osd_client.h |    1 -
+ net/ceph/osd_client.c           |   17 ++++++++---------
+ 2 files changed, 8 insertions(+), 10 deletions(-)
+
+--- a/include/linux/ceph/osd_client.h
++++ b/include/linux/ceph/osd_client.h
+@@ -145,7 +145,6 @@ struct ceph_osd_request {
+       s32               r_reply_op_result[CEPH_OSD_MAX_OP];
+       int               r_got_reply;
+       int               r_linger;
+-      int               r_completed;
+ 
+       struct ceph_osd_client *r_osdc;
+       struct kref       r_kref;
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -1522,6 +1522,8 @@ static void handle_reply(struct ceph_osd
+       for (i = 0; i < numops; i++)
+               req->r_reply_op_result[i] = ceph_decode_32(&p);
+ 
++      already_completed = req->r_got_reply;
++
+       if (!req->r_got_reply) {
+ 
+               req->r_result = result;
+@@ -1552,16 +1554,14 @@ static void handle_reply(struct ceph_osd
+           ((flags & CEPH_OSD_FLAG_WRITE) == 0))
+               __unregister_request(osdc, req);
+ 
+-      already_completed = req->r_completed;
+-      req->r_completed = 1;
+       mutex_unlock(&osdc->request_mutex);
+-      if (already_completed)
+-              goto done;
+ 
+-      if (req->r_callback)
+-              req->r_callback(req, msg);
+-      else
+-              complete_all(&req->r_completion);
++      if (!already_completed) {
++              if (req->r_callback)
++                      req->r_callback(req, msg);
++              else
++                      complete_all(&req->r_completion);
++      }
+ 
+       if (flags & CEPH_OSD_FLAG_ONDISK)
+               complete_request(req);
+@@ -2121,7 +2121,6 @@ int ceph_osdc_start_request(struct ceph_
+       __register_request(osdc, req);
+       req->r_sent = 0;
+       req->r_got_reply = 0;
+-      req->r_completed = 0;
+       rc = __map_request(osdc, req, 0);
+       if (rc < 0) {
+               if (nofail) {
diff --git a/queue-3.10/libceph-fix-truncate-size-calculation.patch b/queue-3.10/libceph-fix-truncate-size-calculation.patch

new file mode 100644 (file)

index 0000000..2ae8d9e
--- /dev/null
+++ b/queue-3.10/libceph-fix-truncate-size-calculation.patch
@@ -0,0 +1,42 @@
+From ccca4e37b1a912da3db68aee826557ea66145273 Mon Sep 17 00:00:00 2001
+From: "Yan, Zheng" <zheng.z.yan@intel.com>
+Date: Sun, 2 Jun 2013 18:40:23 +0800
+Subject: libceph: fix truncate size calculation
+
+From: "Yan, Zheng" <zheng.z.yan@intel.com>
+
+commit ccca4e37b1a912da3db68aee826557ea66145273 upstream.
+
+check the "not truncated yet" case
+
+Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ceph/osd_client.c |   14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -733,12 +733,14 @@ struct ceph_osd_request *ceph_osdc_new_r
+ 
+       object_size = le32_to_cpu(layout->fl_object_size);
+       object_base = off - objoff;
+-      if (truncate_size <= object_base) {
+-              truncate_size = 0;
+-      } else {
+-              truncate_size -= object_base;
+-              if (truncate_size > object_size)
+-                      truncate_size = object_size;
++      if (!(truncate_seq == 1 && truncate_size == -1ULL)) {
++              if (truncate_size <= object_base) {
++                      truncate_size = 0;
++              } else {
++                      truncate_size -= object_base;
++                      if (truncate_size > object_size)
++                              truncate_size = object_size;
++              }
+       }
+ 
+       osd_req_op_extent_init(req, 0, opcode, objoff, objlen,
diff --git a/queue-3.10/libceph-potential-null-dereference-in-ceph_osdc_handle_map.patch b/queue-3.10/libceph-potential-null-dereference-in-ceph_osdc_handle_map.patch

new file mode 100644 (file)

index 0000000..1f1761e
--- /dev/null
+++ b/queue-3.10/libceph-potential-null-dereference-in-ceph_osdc_handle_map.patch
@@ -0,0 +1,31 @@
+From b72e19b9225d4297a18715b0998093d843d170fa Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Thu, 15 Aug 2013 08:52:48 +0300
+Subject: libceph: potential NULL dereference in ceph_osdc_handle_map()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit b72e19b9225d4297a18715b0998093d843d170fa upstream.
+
+There are two places where we read "nr_maps" if both of them are set to
+zero then we would hit a NULL dereference here.
+
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ceph/osd_client.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -1786,6 +1786,8 @@ void ceph_osdc_handle_map(struct ceph_os
+               nr_maps--;
+       }
+ 
++      if (!osdc->osdmap)
++              goto bad;
+ done:
+       downgrade_write(&osdc->map_sem);
+       ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
diff --git a/queue-3.10/rbd-complete-notifies-before-cleaning-up-osd_client-and-rbd_dev.patch b/queue-3.10/rbd-complete-notifies-before-cleaning-up-osd_client-and-rbd_dev.patch

new file mode 100644 (file)

index 0000000..38f6451
--- /dev/null
+++ b/queue-3.10/rbd-complete-notifies-before-cleaning-up-osd_client-and-rbd_dev.patch
@@ -0,0 +1,38 @@
+From 9abc59908e0c5f983aaa91150da32d5b62cf60b7 Mon Sep 17 00:00:00 2001
+From: Josh Durgin <josh.durgin@inktank.com>
+Date: Thu, 29 Aug 2013 17:31:03 -0700
+Subject: rbd: complete notifies before cleaning up osd_client and rbd_dev
+
+From: Josh Durgin <josh.durgin@inktank.com>
+
+commit 9abc59908e0c5f983aaa91150da32d5b62cf60b7 upstream.
+
+To ensure rbd_dev is not used after it's released, flush all pending
+notify callbacks before calling rbd_dev_image_release(). No new
+notifies can be added to the queue at this point because the watch has
+already be unregistered with the osd_client.
+
+Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
+Reviewed-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -5150,6 +5150,13 @@ static ssize_t rbd_remove(struct bus_typ
+       ret = rbd_dev_header_watch_sync(rbd_dev, false);
+       if (ret)
+               rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret);
++
++      /*
++       * flush remaining watch callbacks - these must be complete
++       * before the osd_client is shutdown
++       */
++      dout("%s: flushing notifies", __func__);
++      ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
+       rbd_dev_image_release(rbd_dev);
+       module_put(THIS_MODULE);
+       ret = count;
diff --git a/queue-3.10/rbd-fix-a-couple-warnings.patch b/queue-3.10/rbd-fix-a-couple-warnings.patch

new file mode 100644 (file)

index 0000000..cdcc288
--- /dev/null
+++ b/queue-3.10/rbd-fix-a-couple-warnings.patch
@@ -0,0 +1,39 @@
+From e976cad0f0dbe5440a4ca38e29e1f932d9319125 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Sun, 9 Jun 2013 08:40:39 -0700
+Subject: rbd: fix a couple warnings
+
+From: Sage Weil <sage@inktank.com>
+
+commit e976cad0f0dbe5440a4ca38e29e1f932d9319125 upstream.
+
+gcc isn't quite smart enough and generates these warnings:
+
+drivers/block/rbd.c: In function 'rbd_img_request_fill':
+drivers/block/rbd.c:1266:22: warning: 'bio_list' may be used uninitialized in this function [-Wmaybe-uninitialized]
+drivers/block/rbd.c:2186:14: note: 'bio_list' was declared here
+drivers/block/rbd.c:2247:10: warning: 'pages' may be used uninitialized in this function [-Wmaybe-uninitialized]
+
+even though they are initialized for their respective code paths.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -2173,9 +2173,9 @@ static int rbd_img_request_fill(struct r
+       struct rbd_obj_request *obj_request = NULL;
+       struct rbd_obj_request *next_obj_request;
+       bool write_request = img_request_write_test(img_request);
+-      struct bio *bio_list;
++      struct bio *bio_list = 0;
+       unsigned int bio_offset = 0;
+-      struct page **pages;
++      struct page **pages = 0;
+       u64 img_offset;
+       u64 resid;
+       u16 opcode;
diff --git a/queue-3.10/rbd-fix-buffer-size-for-writes-to-images-with-snapshots.patch b/queue-3.10/rbd-fix-buffer-size-for-writes-to-images-with-snapshots.patch

new file mode 100644 (file)

index 0000000..0256f25
--- /dev/null
+++ b/queue-3.10/rbd-fix-buffer-size-for-writes-to-images-with-snapshots.patch
@@ -0,0 +1,56 @@
+From 03507db631c94a48e316c7f638ffb2991544d617 Mon Sep 17 00:00:00 2001
+From: Josh Durgin <josh.durgin@inktank.com>
+Date: Tue, 27 Aug 2013 14:45:46 -0700
+Subject: rbd: fix buffer size for writes to images with snapshots
+
+From: Josh Durgin <josh.durgin@inktank.com>
+
+commit 03507db631c94a48e316c7f638ffb2991544d617 upstream.
+
+rbd_osd_req_create() needs to know the snapshot context size to create
+a buffer large enough to send it with the message front. It gets this
+from the img_request, which was not set for the obj_request yet. This
+resulted in trying to write past the end of the front payload, hitting
+this BUG:
+
+libceph: BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
+
+Fix this by associating the obj_request with its img_request
+immediately after it's created, before the osd request is created.
+
+Fixes: http://tracker.ceph.com/issues/5760
+Suggested-by: Alex Elder <alex.elder@linaro.org>
+Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
+Reviewed-by: Alex Elder <alex.elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -2213,6 +2213,11 @@ static int rbd_img_request_fill(struct r
+               rbd_segment_name_free(object_name);
+               if (!obj_request)
+                       goto out_unwind;
++              /*
++               * set obj_request->img_request before creating the
++               * osd_request so that it gets the right snapc
++               */
++              rbd_img_obj_request_add(img_request, obj_request);
+ 
+               if (type == OBJ_REQUEST_BIO) {
+                       unsigned int clone_size;
+@@ -2254,11 +2259,6 @@ static int rbd_img_request_fill(struct r
+                                       obj_request->pages, length,
+                                       offset & ~PAGE_MASK, false, false);
+ 
+-              /*
+-               * set obj_request->img_request before formatting
+-               * the osd_request so that it gets the right snapc
+-               */
+-              rbd_img_obj_request_add(img_request, obj_request);
+               if (write_request)
+                       rbd_osd_req_format_write(obj_request);
+               else
diff --git a/queue-3.10/rbd-fix-error-handling-from-rbd_snap_name.patch b/queue-3.10/rbd-fix-error-handling-from-rbd_snap_name.patch

new file mode 100644 (file)

index 0000000..c347342
--- /dev/null
+++ b/queue-3.10/rbd-fix-error-handling-from-rbd_snap_name.patch
@@ -0,0 +1,63 @@
+From da6a6b63978d45f9ae582d1f362f182012da3a22 Mon Sep 17 00:00:00 2001
+From: Josh Durgin <josh.durgin@inktank.com>
+Date: Wed, 4 Sep 2013 17:57:31 -0700
+Subject: rbd: fix error handling from rbd_snap_name()
+
+From: Josh Durgin <josh.durgin@inktank.com>
+
+commit da6a6b63978d45f9ae582d1f362f182012da3a22 upstream.
+
+rbd_snap_name() calls rbd_dev_v{1,2}_snap_name() depending on the
+format of the image. The format 1 version returns NULL on error, which
+is handled by the caller. The format 2 version returns an ERR_PTR,
+which the caller of rbd_snap_name() does not expect.
+
+Fortunately this is unlikely to occur in practice because
+rbd_snap_id_by_name() is called before rbd_snap_name(). This would hit
+similar errors to rbd_snap_name() (like the snapshot not existing) and
+return early, so rbd_snap_name() would not hit an error unless the
+snapshot was removed between the two calls or memory was exhausted.
+
+Use an ERR_PTR in rbd_dev_v1_snap_name() so that the specific error
+can be propagated, and it is consistent with rbd_dev_v2_snap_name().
+Handle the ERR_PTR in the only rbd_snap_name() caller.
+
+Suggested-by: Alex Elder <alex.elder@linaro.org>
+Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
+Reviewed-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -937,12 +937,14 @@ static const char *rbd_dev_v1_snap_name(
+                                       u64 snap_id)
+ {
+       u32 which;
++      const char *snap_name;
+ 
+       which = rbd_dev_snap_index(rbd_dev, snap_id);
+       if (which == BAD_SNAP_INDEX)
+-              return NULL;
++              return ERR_PTR(-ENOENT);
+ 
+-      return _rbd_dev_v1_snap_name(rbd_dev, which);
++      snap_name = _rbd_dev_v1_snap_name(rbd_dev, which);
++      return snap_name ? snap_name : ERR_PTR(-ENOMEM);
+ }
+ 
+ static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id)
+@@ -4140,8 +4142,8 @@ static int rbd_dev_spec_update(struct rb
+       /* Look up the snapshot name, and make a copy */
+ 
+       snap_name = rbd_snap_name(rbd_dev, spec->snap_id);
+-      if (!snap_name) {
+-              ret = -ENOMEM;
++      if (IS_ERR(snap_name)) {
++              ret = PTR_ERR(snap_name);
+               goto out_err;
+       }
+ 
diff --git a/queue-3.10/rbd-fix-null-dereference-in-dout.patch b/queue-3.10/rbd-fix-null-dereference-in-dout.patch

new file mode 100644 (file)

index 0000000..0ef0cec
--- /dev/null
+++ b/queue-3.10/rbd-fix-null-dereference-in-dout.patch
@@ -0,0 +1,42 @@
+From c35455791c1131e7ccbf56ea6fbdd562401c2ce2 Mon Sep 17 00:00:00 2001
+From: Josh Durgin <josh.durgin@inktank.com>
+Date: Wed, 28 Aug 2013 17:08:10 -0700
+Subject: rbd: fix null dereference in dout
+
+From: Josh Durgin <josh.durgin@inktank.com>
+
+commit c35455791c1131e7ccbf56ea6fbdd562401c2ce2 upstream.
+
+The order parameter is sometimes NULL in _rbd_dev_v2_snap_size(), but
+the dout() always derefences it. Move this to another dout() protected
+by a check that order is non-NULL.
+
+Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <alex.elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -3712,12 +3712,14 @@ static int _rbd_dev_v2_snap_size(struct
+       if (ret < sizeof (size_buf))
+               return -ERANGE;
+ 
+-      if (order)
++      if (order) {
+               *order = size_buf.order;
++              dout("  order %u", (unsigned int)*order);
++      }
+       *snap_size = le64_to_cpu(size_buf.size);
+ 
+-      dout("  snap_id 0x%016llx order = %u, snap_size = %llu\n",
+-              (unsigned long long)snap_id, (unsigned int)*order,
++      dout("  snap_id 0x%016llx snap_size = %llu\n",
++              (unsigned long long)snap_id,
+               (unsigned long long)*snap_size);
+ 
+       return 0;
diff --git a/queue-3.10/rbd-fix-use-after-free-of-rbd_dev-disk.patch b/queue-3.10/rbd-fix-use-after-free-of-rbd_dev-disk.patch

new file mode 100644 (file)

index 0000000..772e958
--- /dev/null
+++ b/queue-3.10/rbd-fix-use-after-free-of-rbd_dev-disk.patch
@@ -0,0 +1,103 @@
+From 9875201e10496612080e7d164acc8f625c18725c Mon Sep 17 00:00:00 2001
+From: Josh Durgin <josh.durgin@inktank.com>
+Date: Thu, 29 Aug 2013 17:26:31 -0700
+Subject: rbd: fix use-after free of rbd_dev->disk
+
+From: Josh Durgin <josh.durgin@inktank.com>
+
+commit 9875201e10496612080e7d164acc8f625c18725c upstream.
+
+Removing a device deallocates the disk, unschedules the watch, and
+finally cleans up the rbd_dev structure. rbd_dev_refresh(), called
+from the watch callback, updates the disk size and rbd_dev
+structure. With no locking between them, rbd_dev_refresh() may use the
+device or rbd_dev after they've been freed.
+
+To fix this, check whether RBD_DEV_FLAG_REMOVING is set before
+updating the disk size in rbd_dev_refresh(). In order to prevent a
+race where rbd_dev_refresh() is already revalidating the disk when
+rbd_remove() is called, move the call to rbd_bus_del_dev() after the
+watch is unregistered and all notifies are complete. It's safe to
+defer deleting this structure because no new requests can be submitted
+once the RBD_DEV_FLAG_REMOVING is set, since the device cannot be
+opened.
+
+Fixes: http://tracker.ceph.com/issues/5636
+Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
+Reviewed-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c |   40 +++++++++++++++++++++++++++++++++-------
+ 1 file changed, 33 insertions(+), 7 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -3336,6 +3336,31 @@ static void rbd_exists_validate(struct r
+               clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
+ }
+ 
++static void rbd_dev_update_size(struct rbd_device *rbd_dev)
++{
++      sector_t size;
++      bool removing;
++
++      /*
++       * Don't hold the lock while doing disk operations,
++       * or lock ordering will conflict with the bdev mutex via:
++       * rbd_add() -> blkdev_get() -> rbd_open()
++       */
++      spin_lock_irq(&rbd_dev->lock);
++      removing = test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags);
++      spin_unlock_irq(&rbd_dev->lock);
++      /*
++       * If the device is being removed, rbd_dev->disk has
++       * been destroyed, so don't try to update its size
++       */
++      if (!removing) {
++              size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE;
++              dout("setting size to %llu sectors", (unsigned long long)size);
++              set_capacity(rbd_dev->disk, size);
++              revalidate_disk(rbd_dev->disk);
++      }
++}
++
+ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+ {
+       u64 mapping_size;
+@@ -3354,12 +3379,7 @@ static int rbd_dev_refresh(struct rbd_de
+       rbd_exists_validate(rbd_dev);
+       mutex_unlock(&ctl_mutex);
+       if (mapping_size != rbd_dev->mapping.size) {
+-              sector_t size;
+-
+-              size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE;
+-              dout("setting size to %llu sectors", (unsigned long long)size);
+-              set_capacity(rbd_dev->disk, size);
+-              revalidate_disk(rbd_dev->disk);
++              rbd_dev_update_size(rbd_dev);
+       }
+ 
+       return ret;
+@@ -5147,7 +5167,6 @@ static ssize_t rbd_remove(struct bus_typ
+       if (ret < 0 || already)
+               goto done;
+ 
+-      rbd_bus_del_dev(rbd_dev);
+       ret = rbd_dev_header_watch_sync(rbd_dev, false);
+       if (ret)
+               rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret);
+@@ -5158,6 +5177,13 @@ static ssize_t rbd_remove(struct bus_typ
+        */
+       dout("%s: flushing notifies", __func__);
+       ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
++      /*
++       * Don't free anything from rbd_dev->disk until after all
++       * notifies are completely processed. Otherwise
++       * rbd_bus_del_dev() will race with rbd_watch_cb(), resulting
++       * in a potential use after free of rbd_dev->disk or rbd_dev.
++       */
++      rbd_bus_del_dev(rbd_dev);
+       rbd_dev_image_release(rbd_dev);
+       module_put(THIS_MODULE);
+       ret = count;
diff --git a/queue-3.10/rbd-flush-dcache-after-zeroing-page-data.patch b/queue-3.10/rbd-flush-dcache-after-zeroing-page-data.patch

new file mode 100644 (file)

index 0000000..219a3cf
--- /dev/null
+++ b/queue-3.10/rbd-flush-dcache-after-zeroing-page-data.patch
@@ -0,0 +1,43 @@
+From e215605417b87732c6debf65da6d953016a1e5bc Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 22 May 2013 20:54:25 -0500
+Subject: rbd: flush dcache after zeroing page data
+
+From: Alex Elder <elder@inktank.com>
+
+commit e215605417b87732c6debf65da6d953016a1e5bc upstream.
+
+Neither zero_bio_chain() nor zero_pages() contains a call to flush
+caches after zeroing a portion of a page.  This can cause problems
+on architectures that have caches that allow virtual address
+aliasing.
+
+This resolves:
+    http://tracker.ceph.com/issues/4777
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -1126,6 +1126,7 @@ static void zero_bio_chain(struct bio *c
+                               buf = bvec_kmap_irq(bv, &flags);
+                               memset(buf + remainder, 0,
+                                      bv->bv_len - remainder);
++                              flush_dcache_page(bv->bv_page);
+                               bvec_kunmap_irq(buf, &flags);
+                       }
+                       pos += bv->bv_len;
+@@ -1158,6 +1159,7 @@ static void zero_pages(struct page **pag
+               local_irq_save(flags);
+               kaddr = kmap_atomic(*page);
+               memset(kaddr + page_offset, 0, length);
++              flush_dcache_page(*page);
+               kunmap_atomic(kaddr);
+               local_irq_restore(flags);
+ 
diff --git a/queue-3.10/rbd-ignore-unmapped-snapshots-that-no-longer-exist.patch b/queue-3.10/rbd-ignore-unmapped-snapshots-that-no-longer-exist.patch

new file mode 100644 (file)

index 0000000..3f98719
--- /dev/null
+++ b/queue-3.10/rbd-ignore-unmapped-snapshots-that-no-longer-exist.patch
@@ -0,0 +1,40 @@
+From efadc98aab674153709cc357ba565f04e3164fcd Mon Sep 17 00:00:00 2001
+From: Josh Durgin <josh.durgin@inktank.com>
+Date: Thu, 29 Aug 2013 19:16:42 -0700
+Subject: rbd: ignore unmapped snapshots that no longer exist
+
+From: Josh Durgin <josh.durgin@inktank.com>
+
+commit efadc98aab674153709cc357ba565f04e3164fcd upstream.
+
+This prevents erroring out while adding a device when a snapshot
+unrelated to the current mapping is deleted between reading the
+snapshot context and reading the snapshot names. If the mapped
+snapshot name is not found an error still occurs as usual.
+
+Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
+Reviewed-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -4055,8 +4055,13 @@ static u64 rbd_v2_snap_id_by_name(struct
+ 
+               snap_id = snapc->snaps[which];
+               snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id);
+-              if (IS_ERR(snap_name))
+-                      break;
++              if (IS_ERR(snap_name)) {
++                      /* ignore no-longer existing snapshots */
++                      if (PTR_ERR(snap_name) == -ENOENT)
++                              continue;
++                      else
++                              break;
++              }
+               found = !strcmp(name, snap_name);
+               kfree(snap_name);
+       }
diff --git a/queue-3.10/rbd-make-rbd_obj_notify_ack-synchronous.patch b/queue-3.10/rbd-make-rbd_obj_notify_ack-synchronous.patch

new file mode 100644 (file)

index 0000000..5736d39
--- /dev/null
+++ b/queue-3.10/rbd-make-rbd_obj_notify_ack-synchronous.patch
@@ -0,0 +1,70 @@
+From 20e0af67ce88c657d0601977b9941a2256afbdaa Mon Sep 17 00:00:00 2001
+From: Josh Durgin <josh.durgin@inktank.com>
+Date: Thu, 29 Aug 2013 17:36:03 -0700
+Subject: rbd: make rbd_obj_notify_ack() synchronous
+
+From: Josh Durgin <josh.durgin@inktank.com>
+
+commit 20e0af67ce88c657d0601977b9941a2256afbdaa upstream.
+
+The only user of rbd_obj_notify_ack() is rbd_watch_cb(). It used
+asynchronously with no tracking of when the notify ack completes, so
+it may still be in progress when the osd_client is shut down.  This
+results in a BUG() since the osd client assumes no requests are in
+flight when it stops. Since all notifies are flushed before the
+osd_client is stopped, waiting for the notify ack to complete before
+returning from the watch callback ensures there are no notify acks in
+flight during shutdown.
+
+Rename rbd_obj_notify_ack() to rbd_obj_notify_ack_sync() to reflect
+its new synchronous nature.
+
+Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
+Reviewed-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -2819,7 +2819,7 @@ out_err:
+       obj_request_done_set(obj_request);
+ }
+ 
+-static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, u64 notify_id)
++static int rbd_obj_notify_ack_sync(struct rbd_device *rbd_dev, u64 notify_id)
+ {
+       struct rbd_obj_request *obj_request;
+       struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+@@ -2834,16 +2834,17 @@ static int rbd_obj_notify_ack(struct rbd
+       obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request);
+       if (!obj_request->osd_req)
+               goto out;
+-      obj_request->callback = rbd_obj_request_put;
+ 
+       osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_NOTIFY_ACK,
+                                       notify_id, 0, 0);
+       rbd_osd_req_format_read(obj_request);
+ 
+       ret = rbd_obj_request_submit(osdc, obj_request);
+-out:
+       if (ret)
+-              rbd_obj_request_put(obj_request);
++              goto out;
++      ret = rbd_obj_request_wait(obj_request);
++out:
++      rbd_obj_request_put(obj_request);
+ 
+       return ret;
+ }
+@@ -2863,7 +2864,7 @@ static void rbd_watch_cb(u64 ver, u64 no
+       if (ret)
+               rbd_warn(rbd_dev, ": header refresh error (%d)\n", ret);
+ 
+-      rbd_obj_notify_ack(rbd_dev, notify_id);
++      rbd_obj_notify_ack_sync(rbd_dev, notify_id);
+ }
+ 
+ /*
diff --git a/queue-3.10/rbd-protect-against-concurrent-unmaps.patch b/queue-3.10/rbd-protect-against-concurrent-unmaps.patch

new file mode 100644 (file)

index 0000000..48d386d
--- /dev/null
+++ b/queue-3.10/rbd-protect-against-concurrent-unmaps.patch
@@ -0,0 +1,46 @@
+From 82a442d239695a242c4d584464c9606322cd02aa Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Fri, 31 May 2013 17:40:44 -0500
+Subject: rbd: protect against concurrent unmaps
+
+From: Alex Elder <elder@inktank.com>
+
+commit 82a442d239695a242c4d584464c9606322cd02aa upstream.
+
+Make sure two concurrent unmap operations on the same rbd device
+won't collide, by only proceeding with the removal and cleanup of a
+device if is not already underway.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -5108,6 +5108,7 @@ static ssize_t rbd_remove(struct bus_typ
+       struct list_head *tmp;
+       int dev_id;
+       unsigned long ul;
++      bool already = false;
+       int ret;
+ 
+       ret = strict_strtoul(buf, 10, &ul);
+@@ -5135,11 +5136,12 @@ static ssize_t rbd_remove(struct bus_typ
+               if (rbd_dev->open_count)
+                       ret = -EBUSY;
+               else
+-                      set_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags);
++                      already = test_and_set_bit(RBD_DEV_FLAG_REMOVING,
++                                                      &rbd_dev->flags);
+               spin_unlock_irq(&rbd_dev->lock);
+       }
+       spin_unlock(&rbd_dev_list_lock);
+-      if (ret < 0)
++      if (ret < 0 || already)
+               goto done;
+ 
+       rbd_bus_del_dev(rbd_dev);
diff --git a/queue-3.10/rbd-set-removing-flag-while-holding-list-lock.patch b/queue-3.10/rbd-set-removing-flag-while-holding-list-lock.patch

new file mode 100644 (file)

index 0000000..1ee576f
--- /dev/null
+++ b/queue-3.10/rbd-set-removing-flag-while-holding-list-lock.patch
@@ -0,0 +1,112 @@
+From 751cc0e3cfabdda87c4c21519253c6751e97a8d4 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Fri, 31 May 2013 15:17:01 -0500
+Subject: rbd: set removing flag while holding list lock
+
+From: Alex Elder <elder@inktank.com>
+
+commit 751cc0e3cfabdda87c4c21519253c6751e97a8d4 upstream.
+
+When unmapping a device, its id is supplied, and that is used to
+look up which rbd device should be unmapped.  Looking up the
+device involves searching the rbd device list while holding
+a spinlock that protects access to that list.
+
+Currently all of this is done under protection of the control lock,
+but that protection is going away soon.  To ensure the rbd_dev is
+still valid (still on the list) while setting its REMOVING flag, do
+so while still holding the list lock.  To do so, get rid of
+__rbd_get_dev(), and open code what it did in the one place it
+was used.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c |   53 +++++++++++++++++++++-------------------------------
+ 1 file changed, 22 insertions(+), 31 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -5061,23 +5061,6 @@ err_out_module:
+       return (ssize_t)rc;
+ }
+ 
+-static struct rbd_device *__rbd_get_dev(unsigned long dev_id)
+-{
+-      struct list_head *tmp;
+-      struct rbd_device *rbd_dev;
+-
+-      spin_lock(&rbd_dev_list_lock);
+-      list_for_each(tmp, &rbd_dev_list) {
+-              rbd_dev = list_entry(tmp, struct rbd_device, node);
+-              if (rbd_dev->dev_id == dev_id) {
+-                      spin_unlock(&rbd_dev_list_lock);
+-                      return rbd_dev;
+-              }
+-      }
+-      spin_unlock(&rbd_dev_list_lock);
+-      return NULL;
+-}
+-
+ static void rbd_dev_device_release(struct device *dev)
+ {
+       struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+@@ -5122,7 +5105,8 @@ static ssize_t rbd_remove(struct bus_typ
+                         size_t count)
+ {
+       struct rbd_device *rbd_dev = NULL;
+-      int target_id;
++      struct list_head *tmp;
++      int dev_id;
+       unsigned long ul;
+       int ret;
+ 
+@@ -5131,26 +5115,33 @@ static ssize_t rbd_remove(struct bus_typ
+               return ret;
+ 
+       /* convert to int; abort if we lost anything in the conversion */
+-      target_id = (int) ul;
+-      if (target_id != ul)
++      dev_id = (int)ul;
++      if (dev_id != ul)
+               return -EINVAL;
+ 
+       mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+ 
+-      rbd_dev = __rbd_get_dev(target_id);
+-      if (!rbd_dev) {
+-              ret = -ENOENT;
+-              goto done;
++      ret = -ENOENT;
++      spin_lock(&rbd_dev_list_lock);
++      list_for_each(tmp, &rbd_dev_list) {
++              rbd_dev = list_entry(tmp, struct rbd_device, node);
++              if (rbd_dev->dev_id == dev_id) {
++                      ret = 0;
++                      break;
++              }
+       }
+-
+-      spin_lock_irq(&rbd_dev->lock);
+-      if (rbd_dev->open_count)
+-              ret = -EBUSY;
+-      else
+-              set_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags);
+-      spin_unlock_irq(&rbd_dev->lock);
++      if (!ret) {
++              spin_lock_irq(&rbd_dev->lock);
++              if (rbd_dev->open_count)
++                      ret = -EBUSY;
++              else
++                      set_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags);
++              spin_unlock_irq(&rbd_dev->lock);
++      }
++      spin_unlock(&rbd_dev_list_lock);
+       if (ret < 0)
+               goto done;
++
+       rbd_bus_del_dev(rbd_dev);
+       ret = rbd_dev_header_watch_sync(rbd_dev, false);
+       if (ret)
diff --git a/queue-3.10/series b/queue-3.10/series

index 71f5fb7a9a14f2c03094ad6c3785b6a3c4a0a2ac..c242eabc928754b1f2a1259e42cffcb5047be291 100644 (file)
--- a/queue-3.10/series
+++ b/queue-3.10/series
@@ -89,3 +89,30 @@ sh-always-link-in-helper-functions-extracted-from-libgcc.patch
  selinux-look-for-ipsec-labels-on-both-inbound-and-outbound-packets.patch
  selinux-process-labeled-ipsec-tcp-syn-ack-packets-properly-in-selinux_ip_postroute.patch
  clocksource-dw_apb_timer_of-fix-read_sched_clock.patch
+ceph-improve-error-handling-in-ceph_mdsmap_decode.patch
+libceph-add-lingering-request-reference-when-registered.patch
+rbd-flush-dcache-after-zeroing-page-data.patch
+rbd-set-removing-flag-while-holding-list-lock.patch
+rbd-protect-against-concurrent-unmaps.patch
+libceph-fix-safe-completion.patch
+libceph-fix-truncate-size-calculation.patch
+rbd-fix-a-couple-warnings.patch
+ceph-free-mdsc-if-alloc-mdsc-mdsmap-failed.patch
+ceph-avoid-accessing-invalid-memory.patch
+libceph-call-r_unsafe_callback-when-unsafe-reply-is-received.patch
+ceph-fix-null-pointer-dereference.patch
+ceph-cleanup-types-in-striped_read.patch
+ceph-add-check-returned-value-on-func-ceph_calc_ceph_pg.patch
+libceph-fix-error-handling-in-handle_reply.patch
+libceph-potential-null-dereference-in-ceph_osdc_handle_map.patch
+libceph-create_singlethread_workqueue-doesn-t-return-err_ptrs.patch
+ceph-fix-bugs-about-handling-short-read-for-sync-read-mode.patch
+ceph-allow-sync_read-write-return-partial-successed-size-of-read-write.patch
+rbd-fix-buffer-size-for-writes-to-images-with-snapshots.patch
+rbd-fix-null-dereference-in-dout.patch
+libceph-add-function-to-ensure-notifies-are-complete.patch
+rbd-complete-notifies-before-cleaning-up-osd_client-and-rbd_dev.patch
+rbd-make-rbd_obj_notify_ack-synchronous.patch
+rbd-fix-use-after-free-of-rbd_dev-disk.patch
+rbd-ignore-unmapped-snapshots-that-no-longer-exist.patch
+rbd-fix-error-handling-from-rbd_snap_name.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 6 Jan 2014 22:09:24 +0000 (14:09 -0800)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 6 Jan 2014 22:09:24 +0000 (14:09 -0800)
queue-3.10/ceph-add-check-returned-value-on-func-ceph_calc_ceph_pg.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/ceph-allow-sync_read-write-return-partial-successed-size-of-read-write.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/ceph-avoid-accessing-invalid-memory.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/ceph-cleanup-types-in-striped_read.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/ceph-fix-bugs-about-handling-short-read-for-sync-read-mode.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/ceph-fix-null-pointer-dereference.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/ceph-free-mdsc-if-alloc-mdsc-mdsmap-failed.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/ceph-improve-error-handling-in-ceph_mdsmap_decode.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/libceph-add-function-to-ensure-notifies-are-complete.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/libceph-add-lingering-request-reference-when-registered.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/libceph-call-r_unsafe_callback-when-unsafe-reply-is-received.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/libceph-create_singlethread_workqueue-doesn-t-return-err_ptrs.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/libceph-fix-error-handling-in-handle_reply.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/libceph-fix-safe-completion.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/libceph-fix-truncate-size-calculation.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/libceph-potential-null-dereference-in-ceph_osdc_handle_map.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/rbd-complete-notifies-before-cleaning-up-osd_client-and-rbd_dev.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/rbd-fix-a-couple-warnings.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/rbd-fix-buffer-size-for-writes-to-images-with-snapshots.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/rbd-fix-error-handling-from-rbd_snap_name.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/rbd-fix-null-dereference-in-dout.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/rbd-fix-use-after-free-of-rbd_dev-disk.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/rbd-flush-dcache-after-zeroing-page-data.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/rbd-ignore-unmapped-snapshots-that-no-longer-exist.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/rbd-make-rbd_obj_notify_ack-synchronous.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/rbd-protect-against-concurrent-unmaps.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/rbd-set-removing-flag-while-holding-list-lock.patch	[new file with mode: 0644]	patch \| blob
queue-3.10/series		patch \| blob \| blame \| history