--- /dev/null
+From 2fbcbff1d6b9243ef71c64a8ab993bc3c7bb7af1 Mon Sep 17 00:00:00 2001
+From: majianpeng <majianpeng@gmail.com>
+Date: Fri, 2 Aug 2013 18:14:48 +0800
+Subject: ceph: Add check returned value on func ceph_calc_ceph_pg.
+
+From: majianpeng <majianpeng@gmail.com>
+
+commit 2fbcbff1d6b9243ef71c64a8ab993bc3c7bb7af1 upstream.
+
+Func ceph_calc_ceph_pg maybe failed.So add check for returned value.
+
+Signed-off-by: Jianpeng Ma <majianpeng@gmail.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/ioctl.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/fs/ceph/ioctl.c
++++ b/fs/ceph/ioctl.c
+@@ -211,8 +211,12 @@ static long ceph_ioctl_get_dataloc(struc
+ snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
+ ceph_ino(inode), dl.object_no);
+
+- ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap,
+- ceph_file_layout_pg_pool(ci->i_layout));
++ r = ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap,
++ ceph_file_layout_pg_pool(ci->i_layout));
++ if (r < 0) {
++ up_read(&osdc->map_sem);
++ return r;
++ }
+
+ dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
+ if (dl.osd >= 0) {
--- /dev/null
+From ee7289bfadda5f4ef60884547ebc9989c8fb314a Mon Sep 17 00:00:00 2001
+From: majianpeng <majianpeng@gmail.com>
+Date: Wed, 21 Aug 2013 15:02:51 +0800
+Subject: ceph: allow sync_read/write return partial successed size of read/write.
+
+From: majianpeng <majianpeng@gmail.com>
+
+commit ee7289bfadda5f4ef60884547ebc9989c8fb314a upstream.
+
+For sync_read/write, it may do multi stripe operations.If one of those
+met erro, we return the former successed size rather than a error value.
+There is a exception for write-operation met -EOLDSNAPC.If this occur,we
+retry the whole write again.
+
+Signed-off-by: Jianpeng Ma <majianpeng@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/file.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -373,7 +373,7 @@ more:
+ goto more;
+ }
+
+- if (ret >= 0) {
++ if (read > 0) {
+ ret = read;
+ /* did we bounce off eof? */
+ if (pos + left > inode->i_size)
+@@ -611,6 +611,8 @@ out:
+ if (check_caps)
+ ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY,
+ NULL);
++ } else if (ret != -EOLDSNAPC && written > 0) {
++ ret = written;
+ }
+ return ret;
+ }
--- /dev/null
+From 5446429630257f4723829409337a26c076907d5d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sasha.levin@oracle.com>
+Date: Mon, 1 Jul 2013 18:33:39 -0400
+Subject: ceph: avoid accessing invalid memory
+
+From: Sasha Levin <sasha.levin@oracle.com>
+
+commit 5446429630257f4723829409337a26c076907d5d upstream.
+
+when mounting ceph with a dev name that starts with a slash, ceph
+would attempt to access the character before that slash. Since we
+don't actually own that byte of memory, we would trigger an
+invalid access:
+
+[ 43.499934] BUG: unable to handle kernel paging request at ffff880fa3a97fff
+[ 43.500984] IP: [<ffffffff818f3884>] parse_mount_options+0x1a4/0x300
+[ 43.501491] PGD 743b067 PUD 10283c4067 PMD 10282a6067 PTE 8000000fa3a97060
+[ 43.502301] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
+[ 43.503006] Dumping ftrace buffer:
+[ 43.503596] (ftrace buffer empty)
+[ 43.504046] CPU: 0 PID: 10879 Comm: mount Tainted: G W 3.10.0-sasha #1129
+[ 43.504851] task: ffff880fa625b000 ti: ffff880fa3412000 task.ti: ffff880fa3412000
+[ 43.505608] RIP: 0010:[<ffffffff818f3884>] [<ffffffff818f3884>] parse_mount_options$
+[ 43.506552] RSP: 0018:ffff880fa3413d08 EFLAGS: 00010286
+[ 43.507133] RAX: ffff880fa3a98000 RBX: ffff880fa3a98000 RCX: 0000000000000000
+[ 43.507893] RDX: ffff880fa3a98001 RSI: 000000000000002f RDI: ffff880fa3a98000
+[ 43.508610] RBP: ffff880fa3413d58 R08: 0000000000001f99 R09: ffff880fa3fe64c0
+[ 43.509426] R10: ffff880fa3413d98 R11: ffff880fa38710d8 R12: ffff880fa3413da0
+[ 43.509792] R13: ffff880fa3a97fff R14: 0000000000000000 R15: ffff880fa3413d90
+[ 43.509792] FS: 00007fa9c48757e0(0000) GS:ffff880fd2600000(0000) knlGS:000000000000$
+[ 43.509792] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
+[ 43.509792] CR2: ffff880fa3a97fff CR3: 0000000fa3bb9000 CR4: 00000000000006b0
+[ 43.509792] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[ 43.509792] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
+[ 43.509792] Stack:
+[ 43.509792] 0000e5180000000e ffffffff85ca1900 ffff880fa38710d8 ffff880fa3413d98
+[ 43.509792] 0000000000000120 0000000000000000 ffff880fa3a98000 0000000000000000
+[ 43.509792] ffffffff85cf32a0 0000000000000000 ffff880fa3413dc8 ffffffff818f3c72
+[ 43.509792] Call Trace:
+[ 43.509792] [<ffffffff818f3c72>] ceph_mount+0xa2/0x390
+[ 43.509792] [<ffffffff81226314>] ? pcpu_alloc+0x334/0x3c0
+[ 43.509792] [<ffffffff81282f8d>] mount_fs+0x8d/0x1a0
+[ 43.509792] [<ffffffff812263d0>] ? __alloc_percpu+0x10/0x20
+[ 43.509792] [<ffffffff8129f799>] vfs_kern_mount+0x79/0x100
+[ 43.509792] [<ffffffff812a224d>] do_new_mount+0xcd/0x1c0
+[ 43.509792] [<ffffffff812a2e8d>] do_mount+0x15d/0x210
+[ 43.509792] [<ffffffff81220e55>] ? strndup_user+0x45/0x60
+[ 43.509792] [<ffffffff812a2fdd>] SyS_mount+0x9d/0xe0
+[ 43.509792] [<ffffffff83fd816c>] tracesys+0xdd/0xe2
+[ 43.509792] Code: 4c 8b 5d c0 74 0a 48 8d 50 01 49 89 14 24 eb 17 31 c0 48 83 c9 ff $
+[ 43.509792] RIP [<ffffffff818f3884>] parse_mount_options+0x1a4/0x300
+[ 43.509792] RSP <ffff880fa3413d08>
+[ 43.509792] CR2: ffff880fa3a97fff
+[ 43.509792] ---[ end trace 22469cd81e93af51 ]---
+
+Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
+Reviewed-by: Sage Weil <sage@inktan.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/super.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ceph/super.c
++++ b/fs/ceph/super.c
+@@ -357,7 +357,7 @@ static int parse_mount_options(struct ce
+ }
+ err = -EINVAL;
+ dev_name_end--; /* back up to ':' separator */
+- if (*dev_name_end != ':') {
++ if (dev_name_end < dev_name || *dev_name_end != ':') {
+ pr_err("device name is missing path (no : separator in %s)\n",
+ dev_name);
+ goto out;
--- /dev/null
+From 688bac461ba3e9d221a879ab40b687f5d7b5b19c Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Tue, 23 Jul 2013 16:48:01 +0300
+Subject: ceph: cleanup types in striped_read()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit 688bac461ba3e9d221a879ab40b687f5d7b5b19c upstream.
+
+We pass in a u64 value for "len" and then immediately truncate away the
+upper 32 bits.
+
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <alex.elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/file.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -313,9 +313,9 @@ static int striped_read(struct inode *in
+ {
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+- u64 pos, this_len;
++ u64 pos, this_len, left;
+ int io_align, page_align;
+- int left, pages_left;
++ int pages_left;
+ int read;
+ struct page **page_pos;
+ int ret;
+@@ -346,7 +346,7 @@ more:
+ ret = 0;
+ hit_stripe = this_len < left;
+ was_short = ret >= 0 && ret < this_len;
+- dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read,
++ dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
+ ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
+
+ if (ret > 0) {
+@@ -378,7 +378,7 @@ more:
+ if (pos + left > inode->i_size)
+ left = inode->i_size - pos;
+
+- dout("zero tail %d\n", left);
++ dout("zero tail %llu\n", left);
+ ceph_zero_page_vector_range(page_align + read, left,
+ pages);
+ read += left;
--- /dev/null
+From 02ae66d8b229708fd94b764f6c17ead1c7741fcf Mon Sep 17 00:00:00 2001
+From: majianpeng <majianpeng@gmail.com>
+Date: Tue, 6 Aug 2013 16:20:38 +0800
+Subject: ceph: fix bugs about handling short-read for sync read mode.
+
+From: majianpeng <majianpeng@gmail.com>
+
+commit 02ae66d8b229708fd94b764f6c17ead1c7741fcf upstream.
+
+cephfs . show_layout
+>layyout.data_pool: 0
+>layout.object_size: 4194304
+>layout.stripe_unit: 4194304
+>layout.stripe_count: 1
+
+TestA:
+>dd if=/dev/urandom of=test bs=1M count=2 oflag=direct
+>dd if=/dev/urandom of=test bs=1M count=2 seek=4 oflag=direct
+>dd if=test of=/dev/null bs=6M count=1 iflag=direct
+The messages from func striped_read are:
+ceph: file.c:350 : striped_read 0~6291456 (read 0) got 2097152 HITSTRIPE SHORT
+ceph: file.c:350 : striped_read 2097152~4194304 (read 2097152) got 0 HITSTRIPE SHORT
+ceph: file.c:381 : zero tail 4194304
+ceph: file.c:390 : striped_read returns 6291456
+The hole of file is from 2M--4M.But actualy it zero the last 4M include
+the last 2M area which isn't a hole.
+Using this patch, the messages are:
+ceph: file.c:350 : striped_read 0~6291456 (read 0) got 2097152 HITSTRIPE SHORT
+ceph: file.c:358 : zero gap 2097152 to 4194304
+ceph: file.c:350 : striped_read 4194304~2097152 (read 4194304) got 2097152
+ceph: file.c:384 : striped_read returns 6291456
+
+TestB:
+>echo majianpeng > test
+>dd if=test of=/dev/null bs=2M count=1 iflag=direct
+The messages are:
+ceph: file.c:350 : striped_read 0~6291456 (read 0) got 11 HITSTRIPE SHORT
+ceph: file.c:350 : striped_read 11~6291445 (read 11) got 0 HITSTRIPE SHORT
+ceph: file.c:390 : striped_read returns 11
+For this case,it did once more striped_read.It's no meaningless.
+Using this patch, the message are:
+ceph: file.c:350 : striped_read 0~6291456 (read 0) got 11 HITSTRIPE SHORT
+ceph: file.c:384 : striped_read returns 11
+
+Big thanks to Yan Zheng for the patch.
+
+Reviewed-by: Yan, Zheng <zheng.z.yan@intel.com>
+Signed-off-by: Jianpeng Ma <majianpeng@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/file.c | 39 ++++++++++++++++-----------------------
+ 1 file changed, 16 insertions(+), 23 deletions(-)
+
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -349,44 +349,37 @@ more:
+ dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
+ ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
+
+- if (ret > 0) {
+- int didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
+-
+- if (read < pos - off) {
+- dout(" zero gap %llu to %llu\n", off + read, pos);
+- ceph_zero_page_vector_range(page_align + read,
+- pos - off - read, pages);
++ if (ret >= 0) {
++ int didpages;
++ if (was_short && (pos + ret < inode->i_size)) {
++ u64 tmp = min(this_len - ret,
++ inode->i_size - pos - ret);
++ dout(" zero gap %llu to %llu\n",
++ pos + ret, pos + ret + tmp);
++ ceph_zero_page_vector_range(page_align + read + ret,
++ tmp, pages);
++ ret += tmp;
+ }
++
++ didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
+ pos += ret;
+ read = pos - off;
+ left -= ret;
+ page_pos += didpages;
+ pages_left -= didpages;
+
+- /* hit stripe? */
+- if (left && hit_stripe)
++ /* hit stripe and need continue*/
++ if (left && hit_stripe && pos < inode->i_size)
+ goto more;
+ }
+
+- if (was_short) {
++ if (ret >= 0) {
++ ret = read;
+ /* did we bounce off eof? */
+ if (pos + left > inode->i_size)
+ *checkeof = 1;
+-
+- /* zero trailing bytes (inside i_size) */
+- if (left > 0 && pos < inode->i_size) {
+- if (pos + left > inode->i_size)
+- left = inode->i_size - pos;
+-
+- dout("zero tail %llu\n", left);
+- ceph_zero_page_vector_range(page_align + read, left,
+- pages);
+- read += left;
+- }
+ }
+
+- if (ret >= 0)
+- ret = read;
+ dout("striped_read returns %d\n", ret);
+ return ret;
+ }
--- /dev/null
+From c338c07c51e3106711fad5eb599e375eadb6855d Mon Sep 17 00:00:00 2001
+From: Nathaniel Yazdani <n1ght.4nd.d4y@gmail.com>
+Date: Sun, 4 Aug 2013 21:04:30 -0700
+Subject: ceph: fix null pointer dereference
+
+From: Nathaniel Yazdani <n1ght.4nd.d4y@gmail.com>
+
+commit c338c07c51e3106711fad5eb599e375eadb6855d upstream.
+
+When register_session() is given an out-of-range argument for mds,
+ceph_mdsmap_get_addr() will return a null pointer, which would be given to
+ceph_con_open() & be dereferenced, causing a kernel oops. This fixes bug #4685
+in the Ceph bug tracker <http://tracker.ceph.com/issues/4685>.
+
+Signed-off-by: Nathaniel Yazdani <n1ght.4nd.d4y@gmail.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/mds_client.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -414,6 +414,9 @@ static struct ceph_mds_session *register
+ {
+ struct ceph_mds_session *s;
+
++ if (mds >= mdsc->mdsmap->m_max_mds)
++ return ERR_PTR(-EINVAL);
++
+ s = kzalloc(sizeof(*s), GFP_NOFS);
+ if (!s)
+ return ERR_PTR(-ENOMEM);
--- /dev/null
+From fb3101b6f0db9ae3f35dc8e6ec908d0af8cdf12e Mon Sep 17 00:00:00 2001
+From: majianpeng <majianpeng@gmail.com>
+Date: Tue, 25 Jun 2013 14:48:19 +0800
+Subject: ceph: Free mdsc if alloc mdsc->mdsmap failed.
+
+From: majianpeng <majianpeng@gmail.com>
+
+commit fb3101b6f0db9ae3f35dc8e6ec908d0af8cdf12e upstream.
+
+Signed-off-by: Jianpeng Ma <majianpeng@gmail.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/mds_client.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -3044,8 +3044,10 @@ int ceph_mdsc_init(struct ceph_fs_client
+ fsc->mdsc = mdsc;
+ mutex_init(&mdsc->mutex);
+ mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
+- if (mdsc->mdsmap == NULL)
++ if (mdsc->mdsmap == NULL) {
++ kfree(mdsc);
+ return -ENOMEM;
++ }
+
+ init_completion(&mdsc->safe_umount_waiters);
+ init_waitqueue_head(&mdsc->session_close_wq);
--- /dev/null
+From c213b50b7dcbf06abcfbf1e4eee5b76586718bd9 Mon Sep 17 00:00:00 2001
+From: Emil Goode <emilgoode@gmail.com>
+Date: Tue, 28 May 2013 16:59:00 +0200
+Subject: ceph: improve error handling in ceph_mdsmap_decode
+
+From: Emil Goode <emilgoode@gmail.com>
+
+commit c213b50b7dcbf06abcfbf1e4eee5b76586718bd9 upstream.
+
+This patch makes the following improvements to the error handling
+in the ceph_mdsmap_decode function:
+
+- Add a NULL check for return value from kcalloc
+- Make use of the variable err
+
+Signed-off-by: Emil Goode <emilgoode@gmail.com>
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/mdsmap.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/ceph/mdsmap.c
++++ b/fs/ceph/mdsmap.c
+@@ -138,6 +138,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(v
+ m->m_info[mds].export_targets =
+ kcalloc(num_export_targets, sizeof(u32),
+ GFP_NOFS);
++ if (m->m_info[mds].export_targets == NULL)
++ goto badmem;
+ for (j = 0; j < num_export_targets; j++)
+ m->m_info[mds].export_targets[j] =
+ ceph_decode_32(&pexport_targets);
+@@ -170,7 +172,7 @@ bad:
+ DUMP_PREFIX_OFFSET, 16, 1,
+ start, end - start, true);
+ ceph_mdsmap_destroy(m);
+- return ERR_PTR(-EINVAL);
++ return ERR_PTR(err);
+ }
+
+ void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
--- /dev/null
+From dd935f44a40f8fb02aff2cc0df2269c92422df1c Mon Sep 17 00:00:00 2001
+From: Josh Durgin <josh.durgin@inktank.com>
+Date: Wed, 28 Aug 2013 21:43:09 -0700
+Subject: libceph: add function to ensure notifies are complete
+
+From: Josh Durgin <josh.durgin@inktank.com>
+
+commit dd935f44a40f8fb02aff2cc0df2269c92422df1c upstream.
+
+Without a way to flush the osd client's notify workqueue, a watch
+event that is unregistered could continue receiving callbacks
+indefinitely.
+
+Unregistering the event simply means no new notifies are added to the
+queue, but there may still be events in the queue that will call the
+watch callback for the event. If the queue is flushed after the event
+is unregistered, the caller can be sure no more watch callbacks will
+occur for the canceled watch.
+
+Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/ceph/osd_client.h | 2 ++
+ net/ceph/osd_client.c | 11 +++++++++++
+ 2 files changed, 13 insertions(+)
+
+--- a/include/linux/ceph/osd_client.h
++++ b/include/linux/ceph/osd_client.h
+@@ -335,6 +335,8 @@ extern int ceph_osdc_wait_request(struct
+ struct ceph_osd_request *req);
+ extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
+
++extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc);
++
+ extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
+ struct ceph_vino vino,
+ struct ceph_file_layout *layout,
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -2209,6 +2209,17 @@ void ceph_osdc_sync(struct ceph_osd_clie
+ EXPORT_SYMBOL(ceph_osdc_sync);
+
+ /*
++ * Call all pending notify callbacks - for use after a watch is
++ * unregistered, to make sure no more callbacks for it will be invoked
++ */
++extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc)
++{
++ flush_workqueue(osdc->notify_wq);
++}
++EXPORT_SYMBOL(ceph_osdc_flush_notifies);
++
++
++/*
+ * init, shutdown
+ */
+ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
--- /dev/null
+From 96e4dac66f69d28af2b736e723364efbbdf9fdee Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 22 May 2013 20:54:25 -0500
+Subject: libceph: add lingering request reference when registered
+
+From: Alex Elder <elder@inktank.com>
+
+commit 96e4dac66f69d28af2b736e723364efbbdf9fdee upstream.
+
+When an osd request is set to linger, the osd client holds onto the
+request so it can be re-submitted following certain osd map changes.
+The osd client holds a reference to the request until it is
+unregistered. This is used by rbd for watch requests.
+
+Currently, the reference is taken when the request is marked with
+the linger flag. This means that if an error occurs after that
+time but before the the request completes successfully, that
+reference is leaked.
+
+There's really no reason to take the reference until the request is
+registered in the the osd client's list of lingering requests, and
+that only happens when the lingering (watch) request completes
+successfully.
+
+So take that reference only when it gets registered following
+succesful completion, and drop it (as before) when the request
+gets unregistered. This avoids the reference problem on error
+in rbd.
+
+Rearrange ceph_osdc_unregister_linger_request() to avoid using
+the request pointer after it may have been freed.
+
+And hold an extra reference in kick_requests() while handling
+a linger request that has not yet been registered, to ensure
+it doesn't go away.
+
+This resolves:
+ http://tracker.ceph.com/issues/3859
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ceph/osd_client.c | 12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -1174,6 +1174,7 @@ static void __register_linger_request(st
+ struct ceph_osd_request *req)
+ {
+ dout("__register_linger_request %p\n", req);
++ ceph_osdc_get_request(req);
+ list_add_tail(&req->r_linger_item, &osdc->req_linger);
+ if (req->r_osd)
+ list_add_tail(&req->r_linger_osd,
+@@ -1196,6 +1197,7 @@ static void __unregister_linger_request(
+ if (list_empty(&req->r_osd_item))
+ req->r_osd = NULL;
+ }
++ ceph_osdc_put_request(req);
+ }
+
+ void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
+@@ -1203,9 +1205,8 @@ void ceph_osdc_unregister_linger_request
+ {
+ mutex_lock(&osdc->request_mutex);
+ if (req->r_linger) {
+- __unregister_linger_request(osdc, req);
+ req->r_linger = 0;
+- ceph_osdc_put_request(req);
++ __unregister_linger_request(osdc, req);
+ }
+ mutex_unlock(&osdc->request_mutex);
+ }
+@@ -1217,11 +1218,6 @@ void ceph_osdc_set_request_linger(struct
+ if (!req->r_linger) {
+ dout("set_request_linger %p\n", req);
+ req->r_linger = 1;
+- /*
+- * caller is now responsible for calling
+- * unregister_linger_request
+- */
+- ceph_osdc_get_request(req);
+ }
+ }
+ EXPORT_SYMBOL(ceph_osdc_set_request_linger);
+@@ -1633,8 +1629,10 @@ static void kick_requests(struct ceph_os
+ dout("%p tid %llu restart on osd%d\n",
+ req, req->r_tid,
+ req->r_osd ? req->r_osd->o_osd : -1);
++ ceph_osdc_get_request(req);
+ __unregister_request(osdc, req);
+ __register_linger_request(osdc, req);
++ ceph_osdc_put_request(req);
+ continue;
+ }
+
--- /dev/null
+From 61c5d6bf7074ee32d014dcdf7698dc8c59eb712d Mon Sep 17 00:00:00 2001
+From: "Yan, Zheng" <zheng.z.yan@intel.com>
+Date: Mon, 24 Jun 2013 14:41:27 +0800
+Subject: libceph: call r_unsafe_callback when unsafe reply is received
+
+From: "Yan, Zheng" <zheng.z.yan@intel.com>
+
+commit 61c5d6bf7074ee32d014dcdf7698dc8c59eb712d upstream.
+
+We can't use !req->r_sent to check if OSD request is sent for the
+first time, this is because __cancel_request() zeros req->r_sent
+when OSD map changes. Rather than adding a new variable to struct
+ceph_osd_request to indicate if it's sent for the first time, We
+can call the unsafe callback only when unsafe OSD reply is received.
+If OSD's first reply is safe, just skip calling the unsafe callback.
+
+The purpose of unsafe callback is adding unsafe request to a list,
+so that fsync(2) can wait for the safe reply. fsync(2) doesn't need
+to wait for a write(2) that hasn't returned yet. So it's OK to add
+request to the unsafe list when the first OSD reply is received.
+(ceph_sync_write() returns after receiving the first OSD reply)
+
+Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ceph/osd_client.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -1337,10 +1337,6 @@ static void __send_request(struct ceph_o
+
+ ceph_msg_get(req->r_request); /* send consumes a ref */
+
+- /* Mark the request unsafe if this is the first timet's being sent. */
+-
+- if (!req->r_sent && req->r_unsafe_callback)
+- req->r_unsafe_callback(req, true);
+ req->r_sent = req->r_osd->o_incarnation;
+
+ ceph_con_send(&req->r_osd->o_con, req->r_request);
+@@ -1431,8 +1427,6 @@ static void handle_osds_timeout(struct w
+
+ static void complete_request(struct ceph_osd_request *req)
+ {
+- if (req->r_unsafe_callback)
+- req->r_unsafe_callback(req, false);
+ complete_all(&req->r_safe_completion); /* fsync waiter */
+ }
+
+@@ -1559,14 +1553,20 @@ static void handle_reply(struct ceph_osd
+ mutex_unlock(&osdc->request_mutex);
+
+ if (!already_completed) {
++ if (req->r_unsafe_callback &&
++ result >= 0 && !(flags & CEPH_OSD_FLAG_ONDISK))
++ req->r_unsafe_callback(req, true);
+ if (req->r_callback)
+ req->r_callback(req, msg);
+ else
+ complete_all(&req->r_completion);
+ }
+
+- if (flags & CEPH_OSD_FLAG_ONDISK)
++ if (flags & CEPH_OSD_FLAG_ONDISK) {
++ if (req->r_unsafe_callback && already_completed)
++ req->r_unsafe_callback(req, false);
+ complete_request(req);
++ }
+
+ done:
+ dout("req=%p req->r_linger=%d\n", req, req->r_linger);
--- /dev/null
+From dbcae088fa660086bde6e10d63bb3c9264832d85 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Thu, 15 Aug 2013 08:58:59 +0300
+Subject: libceph: create_singlethread_workqueue() doesn't return ERR_PTRs
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit dbcae088fa660086bde6e10d63bb3c9264832d85 upstream.
+
+create_singlethread_workqueue() returns NULL on error, and it doesn't
+return ERR_PTRs.
+
+I tweaked the error handling a little to be consistent with earlier in
+the function.
+
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ceph/osd_client.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -2257,12 +2257,10 @@ int ceph_osdc_init(struct ceph_osd_clien
+ if (err < 0)
+ goto out_msgpool;
+
++ err = -ENOMEM;
+ osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify");
+- if (IS_ERR(osdc->notify_wq)) {
+- err = PTR_ERR(osdc->notify_wq);
+- osdc->notify_wq = NULL;
++ if (!osdc->notify_wq)
+ goto out_msgpool;
+- }
+ return 0;
+
+ out_msgpool:
--- /dev/null
+From 1874119664dafda3ef2ed9b51b4759a9540d4a1a Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Thu, 15 Aug 2013 08:51:58 +0300
+Subject: libceph: fix error handling in handle_reply()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit 1874119664dafda3ef2ed9b51b4759a9540d4a1a upstream.
+
+We've tried to fix the error paths in this function before, but there
+is still a hidden goto in the ceph_decode_need() macro which goes to the
+wrong place. We need to release the "req" and unlock a mutex before
+returning.
+
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ceph/osd_client.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -1488,14 +1488,14 @@ static void handle_reply(struct ceph_osd
+ dout("handle_reply %p tid %llu req %p result %d\n", msg, tid,
+ req, result);
+
+- ceph_decode_need(&p, end, 4, bad);
++ ceph_decode_need(&p, end, 4, bad_put);
+ numops = ceph_decode_32(&p);
+ if (numops > CEPH_OSD_MAX_OP)
+ goto bad_put;
+ if (numops != req->r_num_ops)
+ goto bad_put;
+ payload_len = 0;
+- ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad);
++ ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad_put);
+ for (i = 0; i < numops; i++) {
+ struct ceph_osd_op *op = p;
+ int len;
+@@ -1513,7 +1513,7 @@ static void handle_reply(struct ceph_osd
+ goto bad_put;
+ }
+
+- ceph_decode_need(&p, end, 4 + numops * 4, bad);
++ ceph_decode_need(&p, end, 4 + numops * 4, bad_put);
+ retry_attempt = ceph_decode_32(&p);
+ for (i = 0; i < numops; i++)
+ req->r_reply_op_result[i] = ceph_decode_32(&p);
--- /dev/null
+From eb845ff13a44477f8a411baedbf11d678b9daf0a Mon Sep 17 00:00:00 2001
+From: "Yan, Zheng" <zheng.z.yan@intel.com>
+Date: Fri, 31 May 2013 15:54:44 +0800
+Subject: libceph: fix safe completion
+
+From: "Yan, Zheng" <zheng.z.yan@intel.com>
+
+commit eb845ff13a44477f8a411baedbf11d678b9daf0a upstream.
+
+handle_reply() calls complete_request() only if the first OSD reply
+has ONDISK flag.
+
+Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/ceph/osd_client.h | 1 -
+ net/ceph/osd_client.c | 17 ++++++++---------
+ 2 files changed, 8 insertions(+), 10 deletions(-)
+
+--- a/include/linux/ceph/osd_client.h
++++ b/include/linux/ceph/osd_client.h
+@@ -145,7 +145,6 @@ struct ceph_osd_request {
+ s32 r_reply_op_result[CEPH_OSD_MAX_OP];
+ int r_got_reply;
+ int r_linger;
+- int r_completed;
+
+ struct ceph_osd_client *r_osdc;
+ struct kref r_kref;
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -1522,6 +1522,8 @@ static void handle_reply(struct ceph_osd
+ for (i = 0; i < numops; i++)
+ req->r_reply_op_result[i] = ceph_decode_32(&p);
+
++ already_completed = req->r_got_reply;
++
+ if (!req->r_got_reply) {
+
+ req->r_result = result;
+@@ -1552,16 +1554,14 @@ static void handle_reply(struct ceph_osd
+ ((flags & CEPH_OSD_FLAG_WRITE) == 0))
+ __unregister_request(osdc, req);
+
+- already_completed = req->r_completed;
+- req->r_completed = 1;
+ mutex_unlock(&osdc->request_mutex);
+- if (already_completed)
+- goto done;
+
+- if (req->r_callback)
+- req->r_callback(req, msg);
+- else
+- complete_all(&req->r_completion);
++ if (!already_completed) {
++ if (req->r_callback)
++ req->r_callback(req, msg);
++ else
++ complete_all(&req->r_completion);
++ }
+
+ if (flags & CEPH_OSD_FLAG_ONDISK)
+ complete_request(req);
+@@ -2121,7 +2121,6 @@ int ceph_osdc_start_request(struct ceph_
+ __register_request(osdc, req);
+ req->r_sent = 0;
+ req->r_got_reply = 0;
+- req->r_completed = 0;
+ rc = __map_request(osdc, req, 0);
+ if (rc < 0) {
+ if (nofail) {
--- /dev/null
+From ccca4e37b1a912da3db68aee826557ea66145273 Mon Sep 17 00:00:00 2001
+From: "Yan, Zheng" <zheng.z.yan@intel.com>
+Date: Sun, 2 Jun 2013 18:40:23 +0800
+Subject: libceph: fix truncate size calculation
+
+From: "Yan, Zheng" <zheng.z.yan@intel.com>
+
+commit ccca4e37b1a912da3db68aee826557ea66145273 upstream.
+
+check the "not truncated yet" case
+
+Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ceph/osd_client.c | 14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -733,12 +733,14 @@ struct ceph_osd_request *ceph_osdc_new_r
+
+ object_size = le32_to_cpu(layout->fl_object_size);
+ object_base = off - objoff;
+- if (truncate_size <= object_base) {
+- truncate_size = 0;
+- } else {
+- truncate_size -= object_base;
+- if (truncate_size > object_size)
+- truncate_size = object_size;
++ if (!(truncate_seq == 1 && truncate_size == -1ULL)) {
++ if (truncate_size <= object_base) {
++ truncate_size = 0;
++ } else {
++ truncate_size -= object_base;
++ if (truncate_size > object_size)
++ truncate_size = object_size;
++ }
+ }
+
+ osd_req_op_extent_init(req, 0, opcode, objoff, objlen,
--- /dev/null
+From b72e19b9225d4297a18715b0998093d843d170fa Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Thu, 15 Aug 2013 08:52:48 +0300
+Subject: libceph: potential NULL dereference in ceph_osdc_handle_map()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit b72e19b9225d4297a18715b0998093d843d170fa upstream.
+
+There are two places where we read "nr_maps" if both of them are set to
+zero then we would hit a NULL dereference here.
+
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ceph/osd_client.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -1786,6 +1786,8 @@ void ceph_osdc_handle_map(struct ceph_os
+ nr_maps--;
+ }
+
++ if (!osdc->osdmap)
++ goto bad;
+ done:
+ downgrade_write(&osdc->map_sem);
+ ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
--- /dev/null
+From 9abc59908e0c5f983aaa91150da32d5b62cf60b7 Mon Sep 17 00:00:00 2001
+From: Josh Durgin <josh.durgin@inktank.com>
+Date: Thu, 29 Aug 2013 17:31:03 -0700
+Subject: rbd: complete notifies before cleaning up osd_client and rbd_dev
+
+From: Josh Durgin <josh.durgin@inktank.com>
+
+commit 9abc59908e0c5f983aaa91150da32d5b62cf60b7 upstream.
+
+To ensure rbd_dev is not used after it's released, flush all pending
+notify callbacks before calling rbd_dev_image_release(). No new
+notifies can be added to the queue at this point because the watch has
+already be unregistered with the osd_client.
+
+Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
+Reviewed-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -5150,6 +5150,13 @@ static ssize_t rbd_remove(struct bus_typ
+ ret = rbd_dev_header_watch_sync(rbd_dev, false);
+ if (ret)
+ rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret);
++
++ /*
++ * flush remaining watch callbacks - these must be complete
++ * before the osd_client is shutdown
++ */
++ dout("%s: flushing notifies", __func__);
++ ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
+ rbd_dev_image_release(rbd_dev);
+ module_put(THIS_MODULE);
+ ret = count;
--- /dev/null
+From e976cad0f0dbe5440a4ca38e29e1f932d9319125 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Sun, 9 Jun 2013 08:40:39 -0700
+Subject: rbd: fix a couple warnings
+
+From: Sage Weil <sage@inktank.com>
+
+commit e976cad0f0dbe5440a4ca38e29e1f932d9319125 upstream.
+
+gcc isn't quite smart enough and generates these warnings:
+
+drivers/block/rbd.c: In function 'rbd_img_request_fill':
+drivers/block/rbd.c:1266:22: warning: 'bio_list' may be used uninitialized in this function [-Wmaybe-uninitialized]
+drivers/block/rbd.c:2186:14: note: 'bio_list' was declared here
+drivers/block/rbd.c:2247:10: warning: 'pages' may be used uninitialized in this function [-Wmaybe-uninitialized]
+
+even though they are initialized for their respective code paths.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -2173,9 +2173,9 @@ static int rbd_img_request_fill(struct r
+ struct rbd_obj_request *obj_request = NULL;
+ struct rbd_obj_request *next_obj_request;
+ bool write_request = img_request_write_test(img_request);
+- struct bio *bio_list;
++ struct bio *bio_list = 0;
+ unsigned int bio_offset = 0;
+- struct page **pages;
++ struct page **pages = 0;
+ u64 img_offset;
+ u64 resid;
+ u16 opcode;
--- /dev/null
+From 03507db631c94a48e316c7f638ffb2991544d617 Mon Sep 17 00:00:00 2001
+From: Josh Durgin <josh.durgin@inktank.com>
+Date: Tue, 27 Aug 2013 14:45:46 -0700
+Subject: rbd: fix buffer size for writes to images with snapshots
+
+From: Josh Durgin <josh.durgin@inktank.com>
+
+commit 03507db631c94a48e316c7f638ffb2991544d617 upstream.
+
+rbd_osd_req_create() needs to know the snapshot context size to create
+a buffer large enough to send it with the message front. It gets this
+from the img_request, which was not set for the obj_request yet. This
+resulted in trying to write past the end of the front payload, hitting
+this BUG:
+
+libceph: BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
+
+Fix this by associating the obj_request with its img_request
+immediately after it's created, before the osd request is created.
+
+Fixes: http://tracker.ceph.com/issues/5760
+Suggested-by: Alex Elder <alex.elder@linaro.org>
+Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
+Reviewed-by: Alex Elder <alex.elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -2213,6 +2213,11 @@ static int rbd_img_request_fill(struct r
+ rbd_segment_name_free(object_name);
+ if (!obj_request)
+ goto out_unwind;
++ /*
++ * set obj_request->img_request before creating the
++ * osd_request so that it gets the right snapc
++ */
++ rbd_img_obj_request_add(img_request, obj_request);
+
+ if (type == OBJ_REQUEST_BIO) {
+ unsigned int clone_size;
+@@ -2254,11 +2259,6 @@ static int rbd_img_request_fill(struct r
+ obj_request->pages, length,
+ offset & ~PAGE_MASK, false, false);
+
+- /*
+- * set obj_request->img_request before formatting
+- * the osd_request so that it gets the right snapc
+- */
+- rbd_img_obj_request_add(img_request, obj_request);
+ if (write_request)
+ rbd_osd_req_format_write(obj_request);
+ else
--- /dev/null
+From da6a6b63978d45f9ae582d1f362f182012da3a22 Mon Sep 17 00:00:00 2001
+From: Josh Durgin <josh.durgin@inktank.com>
+Date: Wed, 4 Sep 2013 17:57:31 -0700
+Subject: rbd: fix error handling from rbd_snap_name()
+
+From: Josh Durgin <josh.durgin@inktank.com>
+
+commit da6a6b63978d45f9ae582d1f362f182012da3a22 upstream.
+
+rbd_snap_name() calls rbd_dev_v{1,2}_snap_name() depending on the
+format of the image. The format 1 version returns NULL on error, which
+is handled by the caller. The format 2 version returns an ERR_PTR,
+which the caller of rbd_snap_name() does not expect.
+
+Fortunately this is unlikely to occur in practice because
+rbd_snap_id_by_name() is called before rbd_snap_name(). This would hit
+similar errors to rbd_snap_name() (like the snapshot not existing) and
+return early, so rbd_snap_name() would not hit an error unless the
+snapshot was removed between the two calls or memory was exhausted.
+
+Use an ERR_PTR in rbd_dev_v1_snap_name() so that the specific error
+can be propagated, and it is consistent with rbd_dev_v2_snap_name().
+Handle the ERR_PTR in the only rbd_snap_name() caller.
+
+Suggested-by: Alex Elder <alex.elder@linaro.org>
+Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
+Reviewed-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -937,12 +937,14 @@ static const char *rbd_dev_v1_snap_name(
+ u64 snap_id)
+ {
+ u32 which;
++ const char *snap_name;
+
+ which = rbd_dev_snap_index(rbd_dev, snap_id);
+ if (which == BAD_SNAP_INDEX)
+- return NULL;
++ return ERR_PTR(-ENOENT);
+
+- return _rbd_dev_v1_snap_name(rbd_dev, which);
++ snap_name = _rbd_dev_v1_snap_name(rbd_dev, which);
++ return snap_name ? snap_name : ERR_PTR(-ENOMEM);
+ }
+
+ static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id)
+@@ -4140,8 +4142,8 @@ static int rbd_dev_spec_update(struct rb
+ /* Look up the snapshot name, and make a copy */
+
+ snap_name = rbd_snap_name(rbd_dev, spec->snap_id);
+- if (!snap_name) {
+- ret = -ENOMEM;
++ if (IS_ERR(snap_name)) {
++ ret = PTR_ERR(snap_name);
+ goto out_err;
+ }
+
--- /dev/null
+From c35455791c1131e7ccbf56ea6fbdd562401c2ce2 Mon Sep 17 00:00:00 2001
+From: Josh Durgin <josh.durgin@inktank.com>
+Date: Wed, 28 Aug 2013 17:08:10 -0700
+Subject: rbd: fix null dereference in dout
+
+From: Josh Durgin <josh.durgin@inktank.com>
+
+commit c35455791c1131e7ccbf56ea6fbdd562401c2ce2 upstream.
+
+The order parameter is sometimes NULL in _rbd_dev_v2_snap_size(), but
+the dout() always derefences it. Move this to another dout() protected
+by a check that order is non-NULL.
+
+Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <alex.elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -3712,12 +3712,14 @@ static int _rbd_dev_v2_snap_size(struct
+ if (ret < sizeof (size_buf))
+ return -ERANGE;
+
+- if (order)
++ if (order) {
+ *order = size_buf.order;
++ dout(" order %u", (unsigned int)*order);
++ }
+ *snap_size = le64_to_cpu(size_buf.size);
+
+- dout(" snap_id 0x%016llx order = %u, snap_size = %llu\n",
+- (unsigned long long)snap_id, (unsigned int)*order,
++ dout(" snap_id 0x%016llx snap_size = %llu\n",
++ (unsigned long long)snap_id,
+ (unsigned long long)*snap_size);
+
+ return 0;
--- /dev/null
+From 9875201e10496612080e7d164acc8f625c18725c Mon Sep 17 00:00:00 2001
+From: Josh Durgin <josh.durgin@inktank.com>
+Date: Thu, 29 Aug 2013 17:26:31 -0700
+Subject: rbd: fix use-after free of rbd_dev->disk
+
+From: Josh Durgin <josh.durgin@inktank.com>
+
+commit 9875201e10496612080e7d164acc8f625c18725c upstream.
+
+Removing a device deallocates the disk, unschedules the watch, and
+finally cleans up the rbd_dev structure. rbd_dev_refresh(), called
+from the watch callback, updates the disk size and rbd_dev
+structure. With no locking between them, rbd_dev_refresh() may use the
+device or rbd_dev after they've been freed.
+
+To fix this, check whether RBD_DEV_FLAG_REMOVING is set before
+updating the disk size in rbd_dev_refresh(). In order to prevent a
+race where rbd_dev_refresh() is already revalidating the disk when
+rbd_remove() is called, move the call to rbd_bus_del_dev() after the
+watch is unregistered and all notifies are complete. It's safe to
+defer deleting this structure because no new requests can be submitted
+once the RBD_DEV_FLAG_REMOVING is set, since the device cannot be
+opened.
+
+Fixes: http://tracker.ceph.com/issues/5636
+Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
+Reviewed-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c | 40 +++++++++++++++++++++++++++++++++-------
+ 1 file changed, 33 insertions(+), 7 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -3336,6 +3336,31 @@ static void rbd_exists_validate(struct r
+ clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
+ }
+
++static void rbd_dev_update_size(struct rbd_device *rbd_dev)
++{
++ sector_t size;
++ bool removing;
++
++ /*
++ * Don't hold the lock while doing disk operations,
++ * or lock ordering will conflict with the bdev mutex via:
++ * rbd_add() -> blkdev_get() -> rbd_open()
++ */
++ spin_lock_irq(&rbd_dev->lock);
++ removing = test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags);
++ spin_unlock_irq(&rbd_dev->lock);
++ /*
++ * If the device is being removed, rbd_dev->disk has
++ * been destroyed, so don't try to update its size
++ */
++ if (!removing) {
++ size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE;
++ dout("setting size to %llu sectors", (unsigned long long)size);
++ set_capacity(rbd_dev->disk, size);
++ revalidate_disk(rbd_dev->disk);
++ }
++}
++
+ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+ {
+ u64 mapping_size;
+@@ -3354,12 +3379,7 @@ static int rbd_dev_refresh(struct rbd_de
+ rbd_exists_validate(rbd_dev);
+ mutex_unlock(&ctl_mutex);
+ if (mapping_size != rbd_dev->mapping.size) {
+- sector_t size;
+-
+- size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE;
+- dout("setting size to %llu sectors", (unsigned long long)size);
+- set_capacity(rbd_dev->disk, size);
+- revalidate_disk(rbd_dev->disk);
++ rbd_dev_update_size(rbd_dev);
+ }
+
+ return ret;
+@@ -5147,7 +5167,6 @@ static ssize_t rbd_remove(struct bus_typ
+ if (ret < 0 || already)
+ goto done;
+
+- rbd_bus_del_dev(rbd_dev);
+ ret = rbd_dev_header_watch_sync(rbd_dev, false);
+ if (ret)
+ rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret);
+@@ -5158,6 +5177,13 @@ static ssize_t rbd_remove(struct bus_typ
+ */
+ dout("%s: flushing notifies", __func__);
+ ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
++ /*
++ * Don't free anything from rbd_dev->disk until after all
++ * notifies are completely processed. Otherwise
++ * rbd_bus_del_dev() will race with rbd_watch_cb(), resulting
++ * in a potential use after free of rbd_dev->disk or rbd_dev.
++ */
++ rbd_bus_del_dev(rbd_dev);
+ rbd_dev_image_release(rbd_dev);
+ module_put(THIS_MODULE);
+ ret = count;
--- /dev/null
+From e215605417b87732c6debf65da6d953016a1e5bc Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 22 May 2013 20:54:25 -0500
+Subject: rbd: flush dcache after zeroing page data
+
+From: Alex Elder <elder@inktank.com>
+
+commit e215605417b87732c6debf65da6d953016a1e5bc upstream.
+
+Neither zero_bio_chain() nor zero_pages() contains a call to flush
+caches after zeroing a portion of a page. This can cause problems
+on architectures that have caches that allow virtual address
+aliasing.
+
+This resolves:
+ http://tracker.ceph.com/issues/4777
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -1126,6 +1126,7 @@ static void zero_bio_chain(struct bio *c
+ buf = bvec_kmap_irq(bv, &flags);
+ memset(buf + remainder, 0,
+ bv->bv_len - remainder);
++ flush_dcache_page(bv->bv_page);
+ bvec_kunmap_irq(buf, &flags);
+ }
+ pos += bv->bv_len;
+@@ -1158,6 +1159,7 @@ static void zero_pages(struct page **pag
+ local_irq_save(flags);
+ kaddr = kmap_atomic(*page);
+ memset(kaddr + page_offset, 0, length);
++ flush_dcache_page(*page);
+ kunmap_atomic(kaddr);
+ local_irq_restore(flags);
+
--- /dev/null
+From efadc98aab674153709cc357ba565f04e3164fcd Mon Sep 17 00:00:00 2001
+From: Josh Durgin <josh.durgin@inktank.com>
+Date: Thu, 29 Aug 2013 19:16:42 -0700
+Subject: rbd: ignore unmapped snapshots that no longer exist
+
+From: Josh Durgin <josh.durgin@inktank.com>
+
+commit efadc98aab674153709cc357ba565f04e3164fcd upstream.
+
+This prevents erroring out while adding a device when a snapshot
+unrelated to the current mapping is deleted between reading the
+snapshot context and reading the snapshot names. If the mapped
+snapshot name is not found an error still occurs as usual.
+
+Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
+Reviewed-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -4055,8 +4055,13 @@ static u64 rbd_v2_snap_id_by_name(struct
+
+ snap_id = snapc->snaps[which];
+ snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id);
+- if (IS_ERR(snap_name))
+- break;
++ if (IS_ERR(snap_name)) {
++ /* ignore no-longer existing snapshots */
++ if (PTR_ERR(snap_name) == -ENOENT)
++ continue;
++ else
++ break;
++ }
+ found = !strcmp(name, snap_name);
+ kfree(snap_name);
+ }
--- /dev/null
+From 20e0af67ce88c657d0601977b9941a2256afbdaa Mon Sep 17 00:00:00 2001
+From: Josh Durgin <josh.durgin@inktank.com>
+Date: Thu, 29 Aug 2013 17:36:03 -0700
+Subject: rbd: make rbd_obj_notify_ack() synchronous
+
+From: Josh Durgin <josh.durgin@inktank.com>
+
+commit 20e0af67ce88c657d0601977b9941a2256afbdaa upstream.
+
+The only user of rbd_obj_notify_ack() is rbd_watch_cb(). It used
+asynchronously with no tracking of when the notify ack completes, so
+it may still be in progress when the osd_client is shut down. This
+results in a BUG() since the osd client assumes no requests are in
+flight when it stops. Since all notifies are flushed before the
+osd_client is stopped, waiting for the notify ack to complete before
+returning from the watch callback ensures there are no notify acks in
+flight during shutdown.
+
+Rename rbd_obj_notify_ack() to rbd_obj_notify_ack_sync() to reflect
+its new synchronous nature.
+
+Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
+Reviewed-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -2819,7 +2819,7 @@ out_err:
+ obj_request_done_set(obj_request);
+ }
+
+-static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, u64 notify_id)
++static int rbd_obj_notify_ack_sync(struct rbd_device *rbd_dev, u64 notify_id)
+ {
+ struct rbd_obj_request *obj_request;
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+@@ -2834,16 +2834,17 @@ static int rbd_obj_notify_ack(struct rbd
+ obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request);
+ if (!obj_request->osd_req)
+ goto out;
+- obj_request->callback = rbd_obj_request_put;
+
+ osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_NOTIFY_ACK,
+ notify_id, 0, 0);
+ rbd_osd_req_format_read(obj_request);
+
+ ret = rbd_obj_request_submit(osdc, obj_request);
+-out:
+ if (ret)
+- rbd_obj_request_put(obj_request);
++ goto out;
++ ret = rbd_obj_request_wait(obj_request);
++out:
++ rbd_obj_request_put(obj_request);
+
+ return ret;
+ }
+@@ -2863,7 +2864,7 @@ static void rbd_watch_cb(u64 ver, u64 no
+ if (ret)
+ rbd_warn(rbd_dev, ": header refresh error (%d)\n", ret);
+
+- rbd_obj_notify_ack(rbd_dev, notify_id);
++ rbd_obj_notify_ack_sync(rbd_dev, notify_id);
+ }
+
+ /*
--- /dev/null
+From 82a442d239695a242c4d584464c9606322cd02aa Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Fri, 31 May 2013 17:40:44 -0500
+Subject: rbd: protect against concurrent unmaps
+
+From: Alex Elder <elder@inktank.com>
+
+commit 82a442d239695a242c4d584464c9606322cd02aa upstream.
+
+Make sure two concurrent unmap operations on the same rbd device
+won't collide, by only proceeding with the removal and cleanup of a
+device if is not already underway.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -5108,6 +5108,7 @@ static ssize_t rbd_remove(struct bus_typ
+ struct list_head *tmp;
+ int dev_id;
+ unsigned long ul;
++ bool already = false;
+ int ret;
+
+ ret = strict_strtoul(buf, 10, &ul);
+@@ -5135,11 +5136,12 @@ static ssize_t rbd_remove(struct bus_typ
+ if (rbd_dev->open_count)
+ ret = -EBUSY;
+ else
+- set_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags);
++ already = test_and_set_bit(RBD_DEV_FLAG_REMOVING,
++ &rbd_dev->flags);
+ spin_unlock_irq(&rbd_dev->lock);
+ }
+ spin_unlock(&rbd_dev_list_lock);
+- if (ret < 0)
++ if (ret < 0 || already)
+ goto done;
+
+ rbd_bus_del_dev(rbd_dev);
--- /dev/null
+From 751cc0e3cfabdda87c4c21519253c6751e97a8d4 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Fri, 31 May 2013 15:17:01 -0500
+Subject: rbd: set removing flag while holding list lock
+
+From: Alex Elder <elder@inktank.com>
+
+commit 751cc0e3cfabdda87c4c21519253c6751e97a8d4 upstream.
+
+When unmapping a device, its id is supplied, and that is used to
+look up which rbd device should be unmapped. Looking up the
+device involves searching the rbd device list while holding
+a spinlock that protects access to that list.
+
+Currently all of this is done under protection of the control lock,
+but that protection is going away soon. To ensure the rbd_dev is
+still valid (still on the list) while setting its REMOVING flag, do
+so while still holding the list lock. To do so, get rid of
+__rbd_get_dev(), and open code what it did in the one place it
+was used.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c | 53 +++++++++++++++++++++-------------------------------
+ 1 file changed, 22 insertions(+), 31 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -5061,23 +5061,6 @@ err_out_module:
+ return (ssize_t)rc;
+ }
+
+-static struct rbd_device *__rbd_get_dev(unsigned long dev_id)
+-{
+- struct list_head *tmp;
+- struct rbd_device *rbd_dev;
+-
+- spin_lock(&rbd_dev_list_lock);
+- list_for_each(tmp, &rbd_dev_list) {
+- rbd_dev = list_entry(tmp, struct rbd_device, node);
+- if (rbd_dev->dev_id == dev_id) {
+- spin_unlock(&rbd_dev_list_lock);
+- return rbd_dev;
+- }
+- }
+- spin_unlock(&rbd_dev_list_lock);
+- return NULL;
+-}
+-
+ static void rbd_dev_device_release(struct device *dev)
+ {
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+@@ -5122,7 +5105,8 @@ static ssize_t rbd_remove(struct bus_typ
+ size_t count)
+ {
+ struct rbd_device *rbd_dev = NULL;
+- int target_id;
++ struct list_head *tmp;
++ int dev_id;
+ unsigned long ul;
+ int ret;
+
+@@ -5131,26 +5115,33 @@ static ssize_t rbd_remove(struct bus_typ
+ return ret;
+
+ /* convert to int; abort if we lost anything in the conversion */
+- target_id = (int) ul;
+- if (target_id != ul)
++ dev_id = (int)ul;
++ if (dev_id != ul)
+ return -EINVAL;
+
+ mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+- rbd_dev = __rbd_get_dev(target_id);
+- if (!rbd_dev) {
+- ret = -ENOENT;
+- goto done;
++ ret = -ENOENT;
++ spin_lock(&rbd_dev_list_lock);
++ list_for_each(tmp, &rbd_dev_list) {
++ rbd_dev = list_entry(tmp, struct rbd_device, node);
++ if (rbd_dev->dev_id == dev_id) {
++ ret = 0;
++ break;
++ }
+ }
+-
+- spin_lock_irq(&rbd_dev->lock);
+- if (rbd_dev->open_count)
+- ret = -EBUSY;
+- else
+- set_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags);
+- spin_unlock_irq(&rbd_dev->lock);
++ if (!ret) {
++ spin_lock_irq(&rbd_dev->lock);
++ if (rbd_dev->open_count)
++ ret = -EBUSY;
++ else
++ set_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags);
++ spin_unlock_irq(&rbd_dev->lock);
++ }
++ spin_unlock(&rbd_dev_list_lock);
+ if (ret < 0)
+ goto done;
++
+ rbd_bus_del_dev(rbd_dev);
+ ret = rbd_dev_header_watch_sync(rbd_dev, false);
+ if (ret)
selinux-look-for-ipsec-labels-on-both-inbound-and-outbound-packets.patch
selinux-process-labeled-ipsec-tcp-syn-ack-packets-properly-in-selinux_ip_postroute.patch
clocksource-dw_apb_timer_of-fix-read_sched_clock.patch
+ceph-improve-error-handling-in-ceph_mdsmap_decode.patch
+libceph-add-lingering-request-reference-when-registered.patch
+rbd-flush-dcache-after-zeroing-page-data.patch
+rbd-set-removing-flag-while-holding-list-lock.patch
+rbd-protect-against-concurrent-unmaps.patch
+libceph-fix-safe-completion.patch
+libceph-fix-truncate-size-calculation.patch
+rbd-fix-a-couple-warnings.patch
+ceph-free-mdsc-if-alloc-mdsc-mdsmap-failed.patch
+ceph-avoid-accessing-invalid-memory.patch
+libceph-call-r_unsafe_callback-when-unsafe-reply-is-received.patch
+ceph-fix-null-pointer-dereference.patch
+ceph-cleanup-types-in-striped_read.patch
+ceph-add-check-returned-value-on-func-ceph_calc_ceph_pg.patch
+libceph-fix-error-handling-in-handle_reply.patch
+libceph-potential-null-dereference-in-ceph_osdc_handle_map.patch
+libceph-create_singlethread_workqueue-doesn-t-return-err_ptrs.patch
+ceph-fix-bugs-about-handling-short-read-for-sync-read-mode.patch
+ceph-allow-sync_read-write-return-partial-successed-size-of-read-write.patch
+rbd-fix-buffer-size-for-writes-to-images-with-snapshots.patch
+rbd-fix-null-dereference-in-dout.patch
+libceph-add-function-to-ensure-notifies-are-complete.patch
+rbd-complete-notifies-before-cleaning-up-osd_client-and-rbd_dev.patch
+rbd-make-rbd_obj_notify_ack-synchronous.patch
+rbd-fix-use-after-free-of-rbd_dev-disk.patch
+rbd-ignore-unmapped-snapshots-that-no-longer-exist.patch
+rbd-fix-error-handling-from-rbd_snap_name.patch