--- /dev/null
+From 59e72ea89b6ed9d4dedd6bd25dbebef20c4e8e31 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Wed, 27 Jun 2012 12:24:08 -0700
+Subject: libceph: set peer name on con_open, not init
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit b7a9e5dd40f17a48a72f249b8bbc989b63bae5fd)
+
+The peer name may change on each open attempt, even when the connection is
+reused.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/mds_client.c | 7 ++++---
+ include/linux/ceph/messenger.h | 4 ++--
+ net/ceph/messenger.c | 12 +++++++-----
+ net/ceph/mon_client.c | 4 ++--
+ net/ceph/osd_client.c | 10 ++++++----
+ 5 files changed, 21 insertions(+), 16 deletions(-)
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -394,8 +394,7 @@ static struct ceph_mds_session *register
+ s->s_seq = 0;
+ mutex_init(&s->s_mutex);
+
+- ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr,
+- CEPH_ENTITY_TYPE_MDS, mds);
++ ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr);
+
+ spin_lock_init(&s->s_gen_ttl_lock);
+ s->s_cap_gen = 0;
+@@ -437,7 +436,8 @@ static struct ceph_mds_session *register
+ mdsc->sessions[mds] = s;
+ atomic_inc(&s->s_ref); /* one ref to sessions[], one to caller */
+
+- ceph_con_open(&s->s_con, ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
++ ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds,
++ ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
+
+ return s;
+
+@@ -2529,6 +2529,7 @@ static void send_mds_reconnect(struct ce
+ session->s_seq = 0;
+
+ ceph_con_open(&session->s_con,
++ CEPH_ENTITY_TYPE_MDS, mds,
+ ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
+
+ /* replay unsafe requests */
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -224,9 +224,9 @@ extern void ceph_messenger_init(struct c
+
+ extern void ceph_con_init(struct ceph_connection *con, void *private,
+ const struct ceph_connection_operations *ops,
+- struct ceph_messenger *msgr, __u8 entity_type,
+- __u64 entity_num);
++ struct ceph_messenger *msgr);
+ extern void ceph_con_open(struct ceph_connection *con,
++ __u8 entity_type, __u64 entity_num,
+ struct ceph_entity_addr *addr);
+ extern bool ceph_con_opened(struct ceph_connection *con);
+ extern void ceph_con_close(struct ceph_connection *con);
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -523,12 +523,17 @@ EXPORT_SYMBOL(ceph_con_close);
+ /*
+ * Reopen a closed connection, with a new peer address.
+ */
+-void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
++void ceph_con_open(struct ceph_connection *con,
++ __u8 entity_type, __u64 entity_num,
++ struct ceph_entity_addr *addr)
+ {
+ dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
+ set_bit(OPENING, &con->state);
+ WARN_ON(!test_and_clear_bit(CLOSED, &con->state));
+
++ con->peer_name.type = (__u8) entity_type;
++ con->peer_name.num = cpu_to_le64(entity_num);
++
+ memcpy(&con->peer_addr, addr, sizeof(*addr));
+ con->delay = 0; /* reset backoff memory */
+ queue_con(con);
+@@ -548,7 +553,7 @@ bool ceph_con_opened(struct ceph_connect
+ */
+ void ceph_con_init(struct ceph_connection *con, void *private,
+ const struct ceph_connection_operations *ops,
+- struct ceph_messenger *msgr, __u8 entity_type, __u64 entity_num)
++ struct ceph_messenger *msgr)
+ {
+ dout("con_init %p\n", con);
+ memset(con, 0, sizeof(*con));
+@@ -558,9 +563,6 @@ void ceph_con_init(struct ceph_connectio
+
+ con_sock_state_init(con);
+
+- con->peer_name.type = (__u8) entity_type;
+- con->peer_name.num = cpu_to_le64(entity_num);
+-
+ mutex_init(&con->mutex);
+ INIT_LIST_HEAD(&con->out_queue);
+ INIT_LIST_HEAD(&con->out_sent);
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -143,11 +143,11 @@ static int __open_session(struct ceph_mo
+ monc->want_next_osdmap = !!monc->want_next_osdmap;
+
+ ceph_con_init(&monc->con, monc, &mon_con_ops,
+- &monc->client->msgr,
+- CEPH_ENTITY_TYPE_MON, monc->cur_mon);
++ &monc->client->msgr);
+
+ dout("open_session mon%d opening\n", monc->cur_mon);
+ ceph_con_open(&monc->con,
++ CEPH_ENTITY_TYPE_MON, monc->cur_mon,
+ &monc->monmap->mon_inst[monc->cur_mon].addr);
+
+ /* initiatiate authentication handshake */
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -639,8 +639,7 @@ static struct ceph_osd *create_osd(struc
+ INIT_LIST_HEAD(&osd->o_osd_lru);
+ osd->o_incarnation = 1;
+
+- ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr,
+- CEPH_ENTITY_TYPE_OSD, onum);
++ ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);
+
+ INIT_LIST_HEAD(&osd->o_keepalive_item);
+ return osd;
+@@ -750,7 +749,8 @@ static int __reset_osd(struct ceph_osd_c
+ ret = -EAGAIN;
+ } else {
+ ceph_con_close(&osd->o_con);
+- ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]);
++ ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd,
++ &osdc->osdmap->osd_addr[osd->o_osd]);
+ osd->o_incarnation++;
+ }
+ return ret;
+@@ -1005,7 +1005,9 @@ static int __map_request(struct ceph_osd
+ dout("map_request osd %p is osd%d\n", req->r_osd, o);
+ __insert_osd(osdc, req->r_osd);
+
+- ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]);
++ ceph_con_open(&req->r_osd->o_con,
++ CEPH_ENTITY_TYPE_OSD, o,
++ &osdc->osdmap->osd_addr[o]);
+ }
+
+ if (req->r_osd) {
--- /dev/null
+From 898a7dce3c962d721f2b00e64ea4e9f029f46455 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Wed, 27 Jun 2012 12:24:34 -0700
+Subject: libceph: initialize mon_client con only once
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 735a72ef952d42a256f79ae3e6dc1c17a45c041b)
+
+Do not re-initialize the con on every connection attempt. When we
+ceph_con_close, there may still be work queued on the socket (e.g., to
+close it), and re-initializing will clobber the work_struct state.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/mon_client.c | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -119,7 +119,6 @@ static void __close_session(struct ceph_
+ dout("__close_session closing mon%d\n", monc->cur_mon);
+ ceph_msg_revoke(monc->m_auth);
+ ceph_con_close(&monc->con);
+- monc->con.private = NULL;
+ monc->cur_mon = -1;
+ monc->pending_auth = 0;
+ ceph_auth_reset(monc->auth);
+@@ -142,9 +141,6 @@ static int __open_session(struct ceph_mo
+ monc->sub_renew_after = jiffies; /* i.e., expired */
+ monc->want_next_osdmap = !!monc->want_next_osdmap;
+
+- ceph_con_init(&monc->con, monc, &mon_con_ops,
+- &monc->client->msgr);
+-
+ dout("open_session mon%d opening\n", monc->cur_mon);
+ ceph_con_open(&monc->con,
+ CEPH_ENTITY_TYPE_MON, monc->cur_mon,
+@@ -798,6 +794,9 @@ int ceph_monc_init(struct ceph_mon_clien
+ if (!monc->m_auth)
+ goto out_auth_reply;
+
++ ceph_con_init(&monc->con, monc, &mon_con_ops,
++ &monc->client->msgr);
++
+ monc->cur_mon = -1;
+ monc->hunting = true;
+ monc->sub_renew_after = jiffies;
--- /dev/null
+From 4d2e598744fd587f18aaf5c28525e38ce57011b2 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Wed, 27 Jun 2012 12:31:02 -0700
+Subject: libceph: allow sock transition from CONNECTING to CLOSED
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit fbb85a478f6d4cce6942f1c25c6a68ec5b1e7e7f)
+
+It is possible to close a socket that is in the OPENING state. For
+example, it can happen if ceph_con_close() is called on the con before
+the TCP connection is established. con_work() will come around and shut
+down the socket.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 25 +++++++++++++------------
+ 1 file changed, 13 insertions(+), 12 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -48,17 +48,17 @@
+ * | ----------------------
+ * | \
+ * + con_sock_state_closed() \
+- * |\ \
+- * | \ \
+- * | ----------- \
+- * | | CLOSING | socket event; \
+- * | ----------- await close \
+- * | ^ |
+- * | | |
+- * | + con_sock_state_closing() |
+- * | / \ |
+- * | / --------------- |
+- * | / \ v
++ * |+--------------------------- \
++ * | \ \ \
++ * | ----------- \ \
++ * | | CLOSING | socket event; \ \
++ * | ----------- await close \ \
++ * | ^ \ |
++ * | | \ |
++ * | + con_sock_state_closing() \ |
++ * | / \ | |
++ * | / --------------- | |
++ * | / \ v v
+ * | / --------------
+ * | / -----------------| CONNECTING | socket created, TCP
+ * | | / -------------- connect initiated
+@@ -241,7 +241,8 @@ static void con_sock_state_closed(struct
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED &&
+- old_state != CON_SOCK_STATE_CLOSING))
++ old_state != CON_SOCK_STATE_CLOSING &&
++ old_state != CON_SOCK_STATE_CONNECTING))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ }
+
--- /dev/null
+From d6d147a147a1fa71c5419f5e6e17a094b61d8726 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Mon, 9 Jul 2012 14:22:34 -0700
+Subject: libceph: initialize msgpool message types
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit d50b409fb8698571d8209e5adfe122e287e31290)
+
+Initialize the type field for messages in a msgpool. The caller was doing
+this for osd ops, but not for the reply messages.
+
+Reported-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/msgpool.h | 3 ++-
+ net/ceph/msgpool.c | 7 ++++---
+ net/ceph/osd_client.c | 7 ++++---
+ 3 files changed, 10 insertions(+), 7 deletions(-)
+
+--- a/include/linux/ceph/msgpool.h
++++ b/include/linux/ceph/msgpool.h
+@@ -11,10 +11,11 @@
+ struct ceph_msgpool {
+ const char *name;
+ mempool_t *pool;
++ int type; /* preallocated message type */
+ int front_len; /* preallocated payload size */
+ };
+
+-extern int ceph_msgpool_init(struct ceph_msgpool *pool,
++extern int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
+ int front_len, int size, bool blocking,
+ const char *name);
+ extern void ceph_msgpool_destroy(struct ceph_msgpool *pool);
+--- a/net/ceph/msgpool.c
++++ b/net/ceph/msgpool.c
+@@ -12,7 +12,7 @@ static void *msgpool_alloc(gfp_t gfp_mas
+ struct ceph_msgpool *pool = arg;
+ struct ceph_msg *msg;
+
+- msg = ceph_msg_new(0, pool->front_len, gfp_mask, true);
++ msg = ceph_msg_new(pool->type, pool->front_len, gfp_mask, true);
+ if (!msg) {
+ dout("msgpool_alloc %s failed\n", pool->name);
+ } else {
+@@ -32,10 +32,11 @@ static void msgpool_free(void *element,
+ ceph_msg_put(msg);
+ }
+
+-int ceph_msgpool_init(struct ceph_msgpool *pool,
++int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
+ int front_len, int size, bool blocking, const char *name)
+ {
+ dout("msgpool %s init\n", name);
++ pool->type = type;
+ pool->front_len = front_len;
+ pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool);
+ if (!pool->pool)
+@@ -61,7 +62,7 @@ struct ceph_msg *ceph_msgpool_get(struct
+ WARN_ON(1);
+
+ /* try to alloc a fresh message */
+- return ceph_msg_new(0, front_len, GFP_NOFS, false);
++ return ceph_msg_new(pool->type, front_len, GFP_NOFS, false);
+ }
+
+ msg = mempool_alloc(pool->pool, GFP_NOFS);
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -242,6 +242,7 @@ struct ceph_osd_request *ceph_osdc_alloc
+ }
+ ceph_pagelist_init(req->r_trail);
+ }
++
+ /* create request message; allow space for oid */
+ msg_size += MAX_OBJ_NAME_SIZE;
+ if (snapc)
+@@ -255,7 +256,6 @@ struct ceph_osd_request *ceph_osdc_alloc
+ return NULL;
+ }
+
+- msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP);
+ memset(msg->front.iov_base, 0, msg->front.iov_len);
+
+ req->r_request = msg;
+@@ -1837,11 +1837,12 @@ int ceph_osdc_init(struct ceph_osd_clien
+ if (!osdc->req_mempool)
+ goto out;
+
+- err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true,
++ err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP,
++ OSD_OP_FRONT_LEN, 10, true,
+ "osd_op");
+ if (err < 0)
+ goto out_mempool;
+- err = ceph_msgpool_init(&osdc->msgpool_op_reply,
++ err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY,
+ OSD_OPREPLY_FRONT_LEN, 10, true,
+ "osd_op_reply");
+ if (err < 0)
--- /dev/null
+From a59ad918abdb2ec82586dd09979c0c16a7320b3b Mon Sep 17 00:00:00 2001
+From: Guanjun He <gjhe@suse.com>
+Date: Sun, 8 Jul 2012 19:50:33 -0700
+Subject: libceph: prevent the race of incoming work during teardown
+
+From: Guanjun He <gjhe@suse.com>
+
+(cherry picked from commit a2a3258417eb6a1799cf893350771428875a8287)
+
+Add an atomic variable 'stopping' as flag in struct ceph_messenger,
+set this flag to 1 in function ceph_destroy_client(), and add the condition code
+in function ceph_data_ready() to test the flag value, if true(1), just return.
+
+Signed-off-by: Guanjun He <gjhe@suse.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/messenger.h | 1 +
+ net/ceph/ceph_common.c | 2 ++
+ net/ceph/messenger.c | 5 +++++
+ 3 files changed, 8 insertions(+)
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -50,6 +50,7 @@ struct ceph_messenger {
+ struct ceph_entity_inst inst; /* my name+address */
+ struct ceph_entity_addr my_enc_addr;
+
++ atomic_t stopping;
+ bool nocrc;
+
+ /*
+--- a/net/ceph/ceph_common.c
++++ b/net/ceph/ceph_common.c
+@@ -495,6 +495,8 @@ void ceph_destroy_client(struct ceph_cli
+ {
+ dout("destroy_client %p\n", client);
+
++ atomic_set(&client->msgr.stopping, 1);
++
+ /* unmount */
+ ceph_osdc_stop(&client->osdc);
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -254,6 +254,9 @@ static void con_sock_state_closed(struct
+ static void ceph_sock_data_ready(struct sock *sk, int count_unused)
+ {
+ struct ceph_connection *con = sk->sk_user_data;
++ if (atomic_read(&con->msgr->stopping)) {
++ return;
++ }
+
+ if (sk->sk_state != TCP_CLOSE_WAIT) {
+ dout("%s on %p state = %lu, queueing work\n", __func__,
+@@ -2413,6 +2416,8 @@ void ceph_messenger_init(struct ceph_mes
+ encode_my_addr(msgr);
+ msgr->nocrc = nocrc;
+
++ atomic_set(&msgr->stopping, 0);
++
+ dout("%s %p\n", __func__, msgr);
+ }
+ EXPORT_SYMBOL(ceph_messenger_init);
--- /dev/null
+From 83e7b2023616d4b029b8c5c5d317041a9cbfe05b Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Mon, 30 Jul 2012 16:24:21 -0700
+Subject: libceph: report socket read/write error message
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 3a140a0d5c4b9e35373b016e41dfc85f1e526bdb)
+
+We need to set error_msg to something useful before calling ceph_fault();
+do so here for try_{read,write}(). This is more informative than
+
+libceph: osd0 192.168.106.220:6801 (null)
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2287,14 +2287,18 @@ restart:
+ ret = try_read(con);
+ if (ret == -EAGAIN)
+ goto restart;
+- if (ret < 0)
++ if (ret < 0) {
++ con->error_msg = "socket error on read";
+ goto fault;
++ }
+
+ ret = try_write(con);
+ if (ret == -EAGAIN)
+ goto restart;
+- if (ret < 0)
++ if (ret < 0) {
++ con->error_msg = "socket error on write";
+ goto fault;
++ }
+
+ done:
+ mutex_unlock(&con->mutex);
--- /dev/null
+From a61ddafc8e0b33876e676e2af4e22ebae25d6d63 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Mon, 30 Jul 2012 16:24:37 -0700
+Subject: libceph: fix mutex coverage for ceph_con_close
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 8c50c817566dfa4581f82373aac39f3e608a7dc8)
+
+Hold the mutex while twiddling all of the state bits to avoid possible
+races. While we're here, make not of why we cannot close the socket
+directly.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -503,6 +503,7 @@ static void reset_connection(struct ceph
+ */
+ void ceph_con_close(struct ceph_connection *con)
+ {
++ mutex_lock(&con->mutex);
+ dout("con_close %p peer %s\n", con,
+ ceph_pr_addr(&con->peer_addr.in_addr));
+ clear_bit(NEGOTIATING, &con->state);
+@@ -515,11 +516,16 @@ void ceph_con_close(struct ceph_connecti
+ clear_bit(KEEPALIVE_PENDING, &con->flags);
+ clear_bit(WRITE_PENDING, &con->flags);
+
+- mutex_lock(&con->mutex);
+ reset_connection(con);
+ con->peer_global_seq = 0;
+ cancel_delayed_work(&con->work);
+ mutex_unlock(&con->mutex);
++
++ /*
++ * We cannot close the socket directly from here because the
++ * work threads use it without holding the mutex. Instead, let
++ * con_work() do it.
++ */
+ queue_con(con);
+ }
+ EXPORT_SYMBOL(ceph_con_close);
--- /dev/null
+From 2b126f532154d1ec4efd7967ef0d48e041596c39 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Mon, 30 Jul 2012 16:19:28 -0700
+Subject: libceph: resubmit linger ops when pg mapping changes
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 6194ea895e447fdf4adfd23f67873a32bf4f15ae)
+
+The linger op registration (i.e., watch) modifies the object state. As
+such, the OSD will reply with success if it has already applied without
+doing the associated side-effects (setting up the watch session state).
+If we lose the ACK and resubmit, we will see success but the watch will not
+be correctly registered and we won't get notifies.
+
+To fix this, always resubmit the linger op with a new tid. We accomplish
+this by re-registering as a linger (i.e., 'registered') if we are not yet
+registered. Then the second loop will treat this just like a normal
+case of re-registering.
+
+This mirrors a similar fix on the userland ceph.git, commit 5dd68b95, and
+ceph bug #2796.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/osd_client.c | 26 +++++++++++++++++++++-----
+ 1 file changed, 21 insertions(+), 5 deletions(-)
+
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -888,7 +888,9 @@ static void __register_linger_request(st
+ {
+ dout("__register_linger_request %p\n", req);
+ list_add_tail(&req->r_linger_item, &osdc->req_linger);
+- list_add_tail(&req->r_linger_osd, &req->r_osd->o_linger_requests);
++ if (req->r_osd)
++ list_add_tail(&req->r_linger_osd,
++ &req->r_osd->o_linger_requests);
+ }
+
+ static void __unregister_linger_request(struct ceph_osd_client *osdc,
+@@ -1302,8 +1304,9 @@ static void kick_requests(struct ceph_os
+
+ dout("kick_requests %s\n", force_resend ? " (force resend)" : "");
+ mutex_lock(&osdc->request_mutex);
+- for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
++ for (p = rb_first(&osdc->requests); p; ) {
+ req = rb_entry(p, struct ceph_osd_request, r_node);
++ p = rb_next(p);
+ err = __map_request(osdc, req, force_resend);
+ if (err < 0)
+ continue; /* error */
+@@ -1311,10 +1314,23 @@ static void kick_requests(struct ceph_os
+ dout("%p tid %llu maps to no osd\n", req, req->r_tid);
+ needmap++; /* request a newer map */
+ } else if (err > 0) {
+- dout("%p tid %llu requeued on osd%d\n", req, req->r_tid,
+- req->r_osd ? req->r_osd->o_osd : -1);
+- if (!req->r_linger)
++ if (!req->r_linger) {
++ dout("%p tid %llu requeued on osd%d\n", req,
++ req->r_tid,
++ req->r_osd ? req->r_osd->o_osd : -1);
+ req->r_flags |= CEPH_OSD_FLAG_RETRY;
++ }
++ }
++ if (req->r_linger && list_empty(&req->r_linger_item)) {
++ /*
++ * register as a linger so that we will
++ * re-submit below and get a new tid
++ */
++ dout("%p tid %llu restart on osd%d\n",
++ req, req->r_tid,
++ req->r_osd ? req->r_osd->o_osd : -1);
++ __register_linger_request(osdc, req);
++ __unregister_request(osdc, req);
+ }
+ }
+
--- /dev/null
+From d6fa6884b2e1e9d5b59ed3f6934b69932f7ee6b8 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Mon, 30 Jul 2012 16:20:25 -0700
+Subject: libceph: (re)initialize bio_iter on start of message receive
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit a4107026976f06c9a6ce8cc84a763564ee39d901)
+
+Previously, we were opportunistically initializing the bio_iter if it
+appeared to be uninitialized in the middle of the read path. The problem
+is that a sequence like:
+
+ - start reading message
+ - initialize bio_iter
+ - read half a message
+ - messenger fault, reconnect
+ - restart reading message
+ - ** bio_iter now non-NULL, not reinitialized **
+ - read past end of bio, crash
+
+Instead, initialize the bio_iter unconditionally when we allocate/claim
+the message for read.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1872,6 +1872,11 @@ static int read_partial_message(struct c
+ else
+ con->in_msg_pos.page_pos = 0;
+ con->in_msg_pos.data_pos = 0;
++
++#ifdef CONFIG_BLOCK
++ if (m->bio)
++ init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
++#endif
+ }
+
+ /* front */
+@@ -1888,10 +1893,6 @@ static int read_partial_message(struct c
+ if (ret <= 0)
+ return ret;
+ }
+-#ifdef CONFIG_BLOCK
+- if (m->bio && !m->bio_iter)
+- init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
+-#endif
+
+ /* (page) data */
+ while (con->in_msg_pos.data_pos < data_len) {
+@@ -1902,7 +1903,7 @@ static int read_partial_message(struct c
+ return ret;
+ #ifdef CONFIG_BLOCK
+ } else if (m->bio) {
+-
++ BUG_ON(!m->bio_iter);
+ ret = read_partial_message_bio(con,
+ &m->bio_iter, &m->bio_seg,
+ data_len, do_datacrc);
--- /dev/null
+From 6135073710e680bdb729a0bfcc147d5e45751b03 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Mon, 30 Jul 2012 16:21:40 -0700
+Subject: libceph: protect ceph_con_open() with mutex
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 5469155f2bc83bb2c88b0a0370c3d54d87eed06e)
+
+Take the con mutex while we are initiating a ceph open. This is necessary
+because the may have previously been in use and then closed, which could
+result in a racing workqueue running con_work().
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -537,6 +537,7 @@ void ceph_con_open(struct ceph_connectio
+ __u8 entity_type, __u64 entity_num,
+ struct ceph_entity_addr *addr)
+ {
++ mutex_lock(&con->mutex);
+ dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
+ set_bit(OPENING, &con->state);
+ WARN_ON(!test_and_clear_bit(CLOSED, &con->state));
+@@ -546,6 +547,7 @@ void ceph_con_open(struct ceph_connectio
+
+ memcpy(&con->peer_addr, addr, sizeof(*addr));
+ con->delay = 0; /* reset backoff memory */
++ mutex_unlock(&con->mutex);
+ queue_con(con);
+ }
+ EXPORT_SYMBOL(ceph_con_open);
--- /dev/null
+From e2cccd21c9f226de2252f56c37dd09807e544656 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Mon, 30 Jul 2012 16:22:05 -0700
+Subject: libceph: reset connection retry on successfully negotiation
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 85effe183dd45854d1ad1a370b88cddb403c4c91)
+
+We exponentially back off when we encounter connection errors. If several
+errors accumulate, we will eventually wait ages before even trying to
+reconnect.
+
+Fix this by resetting the backoff counter after a successful negotiation/
+connection with the remote node. Fixes ceph issue #2802.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1629,6 +1629,8 @@ static int process_connect(struct ceph_c
+ if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
+ set_bit(LOSSYTX, &con->flags);
+
++ con->delay = 0; /* reset backoff memory */
++
+ prepare_read_tag(con);
+ break;
+
--- /dev/null
+From 2490465ce3b7da189f4d90cae93cea877cda8b51 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Fri, 20 Jul 2012 15:22:53 -0700
+Subject: libceph: fix fault locking; close socket on lossy fault
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 3b5ede07b55b52c3be27749d183d87257d032065)
+
+If we fault on a lossy connection, we should still close the socket
+immediately, and do so under the con mutex.
+
+We should also take the con mutex before printing out the state bits in
+the debug output.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2330,22 +2330,23 @@ fault:
+ */
+ static void ceph_fault(struct ceph_connection *con)
+ {
++ mutex_lock(&con->mutex);
++
+ pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
+ ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
+ dout("fault %p state %lu to peer %s\n",
+ con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
+
+- if (test_bit(LOSSYTX, &con->flags)) {
+- dout("fault on LOSSYTX channel\n");
+- goto out;
+- }
+-
+- mutex_lock(&con->mutex);
+ if (test_bit(CLOSED, &con->state))
+ goto out_unlock;
+
+ con_close_socket(con);
+
++ if (test_bit(LOSSYTX, &con->flags)) {
++ dout("fault on LOSSYTX channel\n");
++ goto out_unlock;
++ }
++
+ if (con->in_msg) {
+ BUG_ON(con->in_msg->con != con);
+ con->in_msg->con = NULL;
+@@ -2392,7 +2393,6 @@ static void ceph_fault(struct ceph_conne
+
+ out_unlock:
+ mutex_unlock(&con->mutex);
+-out:
+ /*
+ * in case we faulted due to authentication, invalidate our
+ * current tickets so that we can get new ones.
--- /dev/null
+From 87052b587b32338846814dcd88328908b1e39c4c Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Fri, 20 Jul 2012 15:33:04 -0700
+Subject: libceph: move msgr clear_standby under con mutex protection
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 00650931e52e97fe64096bec167f5a6780dfd94a)
+
+Avoid dropping and retaking con->mutex in the ceph_con_send() case by
+leaving locking up to the caller.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2441,12 +2441,10 @@ static void clear_standby(struct ceph_co
+ {
+ /* come back from STANDBY? */
+ if (test_and_clear_bit(STANDBY, &con->state)) {
+- mutex_lock(&con->mutex);
+ dout("clear_standby %p and ++connect_seq\n", con);
+ con->connect_seq++;
+ WARN_ON(test_bit(WRITE_PENDING, &con->flags));
+ WARN_ON(test_bit(KEEPALIVE_PENDING, &con->flags));
+- mutex_unlock(&con->mutex);
+ }
+ }
+
+@@ -2483,11 +2481,12 @@ void ceph_con_send(struct ceph_connectio
+ le32_to_cpu(msg->hdr.front_len),
+ le32_to_cpu(msg->hdr.middle_len),
+ le32_to_cpu(msg->hdr.data_len));
++
++ clear_standby(con);
+ mutex_unlock(&con->mutex);
+
+ /* if there wasn't anything waiting to send before, queue
+ * new work */
+- clear_standby(con);
+ if (test_and_set_bit(WRITE_PENDING, &con->flags) == 0)
+ queue_con(con);
+ }
+@@ -2574,7 +2573,9 @@ void ceph_msg_revoke_incoming(struct cep
+ void ceph_con_keepalive(struct ceph_connection *con)
+ {
+ dout("con_keepalive %p\n", con);
++ mutex_lock(&con->mutex);
+ clear_standby(con);
++ mutex_unlock(&con->mutex);
+ if (test_and_set_bit(KEEPALIVE_PENDING, &con->flags) == 0 &&
+ test_and_set_bit(WRITE_PENDING, &con->flags) == 0)
+ queue_con(con);
--- /dev/null
+From 02d4200f8531bbf26faa35ba98591c603e2ccd2e Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Fri, 20 Jul 2012 15:34:04 -0700
+Subject: libceph: move ceph_con_send() closed check under the con mutex
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit a59b55a602b6c741052d79c1e3643f8440cddd27)
+
+Take the con mutex before checking whether the connection is closed to
+avoid racing with someone else closing it.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 16 +++++++---------
+ 1 file changed, 7 insertions(+), 9 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2453,22 +2453,20 @@ static void clear_standby(struct ceph_co
+ */
+ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
+ {
+- if (test_bit(CLOSED, &con->state)) {
+- dout("con_send %p closed, dropping %p\n", con, msg);
+- ceph_msg_put(msg);
+- return;
+- }
+-
+ /* set src+dst */
+ msg->hdr.src = con->msgr->inst.name;
+-
+ BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
+-
+ msg->needs_out_seq = true;
+
+- /* queue */
+ mutex_lock(&con->mutex);
+
++ if (test_bit(CLOSED, &con->state)) {
++ dout("con_send %p closed, dropping %p\n", con, msg);
++ ceph_msg_put(msg);
++ mutex_unlock(&con->mutex);
++ return;
++ }
++
+ BUG_ON(msg->con != NULL);
+ msg->con = con->ops->get(con);
+ BUG_ON(msg->con == NULL);
--- /dev/null
+From 1e25786c4f816c3a09d59e3a6860740f4370cc63 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Fri, 20 Jul 2012 15:40:04 -0700
+Subject: libceph: drop gratuitous socket close calls in con_work
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 2e8cb10063820af7ed7638e3fd9013eee21266e7)
+
+If the state is CLOSED or OPENING, we shouldn't have a socket.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2284,15 +2284,15 @@ restart:
+ dout("con_work %p STANDBY\n", con);
+ goto done;
+ }
+- if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
+- dout("con_work CLOSED\n");
+- con_close_socket(con);
++ if (test_bit(CLOSED, &con->state)) {
++ dout("con_work %p CLOSED\n", con);
++ BUG_ON(con->sock);
+ goto done;
+ }
+ if (test_and_clear_bit(OPENING, &con->state)) {
+ /* reopen w/ new peer */
+ dout("con_work OPENING\n");
+- con_close_socket(con);
++ BUG_ON(con->sock);
+ }
+
+ ret = try_read(con);
--- /dev/null
+From 642ff60bfc03e68b20a056bf9fbf0e690c9d4abe Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Fri, 20 Jul 2012 16:45:49 -0700
+Subject: libceph: close socket directly from ceph_con_close()
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit ee76e0736db8455e3b11827d6899bd2a4e1d0584)
+
+It is simpler to do this immediately, since we already hold the con mutex.
+It also avoids the need to deal with a not-quite-CLOSED socket in con_work.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 8 +-------
+ 1 file changed, 1 insertion(+), 7 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -519,14 +519,8 @@ void ceph_con_close(struct ceph_connecti
+ reset_connection(con);
+ con->peer_global_seq = 0;
+ cancel_delayed_work(&con->work);
++ con_close_socket(con);
+ mutex_unlock(&con->mutex);
+-
+- /*
+- * We cannot close the socket directly from here because the
+- * work threads use it without holding the mutex. Instead, let
+- * con_work() do it.
+- */
+- queue_con(con);
+ }
+ EXPORT_SYMBOL(ceph_con_close);
+
--- /dev/null
+From 1b3ff237654e3636ea90c6f07871226069b6a809 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Fri, 20 Jul 2012 17:19:43 -0700
+Subject: libceph: drop unnecessary CLOSED check in socket state change
+ callback
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit d7353dd5aaf22ed611fbcd0d4a4a12fb30659290)
+
+
+If we are CLOSED, the socket is closed and we won't get these.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -296,9 +296,6 @@ static void ceph_sock_state_change(struc
+ dout("%s %p state = %lu sk_state = %u\n", __func__,
+ con, con->state, sk->sk_state);
+
+- if (test_bit(CLOSED, &con->state))
+- return;
+-
+ switch (sk->sk_state) {
+ case TCP_CLOSE:
+ dout("%s TCP_CLOSE\n", __func__);
--- /dev/null
+From 2317e7e4742c5c6292663873b30b4db306c84b58 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Fri, 20 Jul 2012 17:24:40 -0700
+Subject: libceph: replace connection state bits with states
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 8dacc7da69a491c515851e68de6036f21b5663ce)
+
+Use a simple set of 6 enumerated values for the socket states (CON_STATE_*)
+and use those instead of the state bits. All of the con->state checks are
+now under the protection of the con mutex, so this is safe. It also
+simplifies many of the state checks because we can check for anything other
+than the expected state instead of various bits for races we can think of.
+
+This appears to hold up well to stress testing both with and without socket
+failure injection on the server side.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/messenger.h | 12 ---
+ net/ceph/messenger.c | 130 +++++++++++++++++++++--------------------
+ 2 files changed, 68 insertions(+), 74 deletions(-)
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -117,18 +117,6 @@ struct ceph_msg_pos {
+ #define BACKOFF 15
+
+ /*
+- * ceph_connection states
+- */
+-#define CONNECTING 1
+-#define NEGOTIATING 2
+-#define CONNECTED 5
+-#define STANDBY 8 /* no outgoing messages, socket closed. we keep
+- * the ceph_connection around to maintain shared
+- * state with the peer. */
+-#define CLOSED 10 /* we've closed the connection */
+-#define OPENING 13 /* open connection w/ (possibly new) peer */
+-
+-/*
+ * A single connection with another host.
+ *
+ * We maintain a queue of outgoing messages, and some session state to
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -77,6 +77,17 @@
+ #define CON_SOCK_STATE_CONNECTED 3 /* -> CLOSING or -> CLOSED */
+ #define CON_SOCK_STATE_CLOSING 4 /* -> CLOSED */
+
++/*
++ * connection states
++ */
++#define CON_STATE_CLOSED 1 /* -> PREOPEN */
++#define CON_STATE_PREOPEN 2 /* -> CONNECTING, CLOSED */
++#define CON_STATE_CONNECTING 3 /* -> NEGOTIATING, CLOSED */
++#define CON_STATE_NEGOTIATING 4 /* -> OPEN, CLOSED */
++#define CON_STATE_OPEN 5 /* -> STANDBY, CLOSED */
++#define CON_STATE_STANDBY 6 /* -> PREOPEN, CLOSED */
++
++
+ /* static tag bytes (protocol control messages) */
+ static char tag_msg = CEPH_MSGR_TAG_MSG;
+ static char tag_ack = CEPH_MSGR_TAG_ACK;
+@@ -503,11 +514,7 @@ void ceph_con_close(struct ceph_connecti
+ mutex_lock(&con->mutex);
+ dout("con_close %p peer %s\n", con,
+ ceph_pr_addr(&con->peer_addr.in_addr));
+- clear_bit(NEGOTIATING, &con->state);
+- clear_bit(CONNECTING, &con->state);
+- clear_bit(CONNECTED, &con->state);
+- clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */
+- set_bit(CLOSED, &con->state);
++ con->state = CON_STATE_CLOSED;
+
+ clear_bit(LOSSYTX, &con->flags); /* so we retry next connect */
+ clear_bit(KEEPALIVE_PENDING, &con->flags);
+@@ -530,8 +537,9 @@ void ceph_con_open(struct ceph_connectio
+ {
+ mutex_lock(&con->mutex);
+ dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
+- set_bit(OPENING, &con->state);
+- WARN_ON(!test_and_clear_bit(CLOSED, &con->state));
++
++ BUG_ON(con->state != CON_STATE_CLOSED);
++ con->state = CON_STATE_PREOPEN;
+
+ con->peer_name.type = (__u8) entity_type;
+ con->peer_name.num = cpu_to_le64(entity_num);
+@@ -571,7 +579,7 @@ void ceph_con_init(struct ceph_connectio
+ INIT_LIST_HEAD(&con->out_sent);
+ INIT_DELAYED_WORK(&con->work, con_work);
+
+- set_bit(CLOSED, &con->state);
++ con->state = CON_STATE_CLOSED;
+ }
+ EXPORT_SYMBOL(ceph_con_init);
+
+@@ -809,27 +817,21 @@ static struct ceph_auth_handshake *get_c
+ if (!con->ops->get_authorizer) {
+ con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
+ con->out_connect.authorizer_len = 0;
+-
+ return NULL;
+ }
+
+ /* Can't hold the mutex while getting authorizer */
+-
+ mutex_unlock(&con->mutex);
+-
+ auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
+-
+ mutex_lock(&con->mutex);
+
+ if (IS_ERR(auth))
+ return auth;
+- if (test_bit(CLOSED, &con->state) || test_bit(OPENING, &con->flags))
++ if (con->state != CON_STATE_NEGOTIATING)
+ return ERR_PTR(-EAGAIN);
+
+ con->auth_reply_buf = auth->authorizer_reply_buf;
+ con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
+-
+-
+ return auth;
+ }
+
+@@ -1484,7 +1486,8 @@ static int process_banner(struct ceph_co
+ static void fail_protocol(struct ceph_connection *con)
+ {
+ reset_connection(con);
+- set_bit(CLOSED, &con->state); /* in case there's queued work */
++ BUG_ON(con->state != CON_STATE_NEGOTIATING);
++ con->state = CON_STATE_CLOSED;
+ }
+
+ static int process_connect(struct ceph_connection *con)
+@@ -1558,8 +1561,7 @@ static int process_connect(struct ceph_c
+ if (con->ops->peer_reset)
+ con->ops->peer_reset(con);
+ mutex_lock(&con->mutex);
+- if (test_bit(CLOSED, &con->state) ||
+- test_bit(OPENING, &con->state))
++ if (con->state != CON_STATE_NEGOTIATING)
+ return -EAGAIN;
+ break;
+
+@@ -1605,8 +1607,10 @@ static int process_connect(struct ceph_c
+ fail_protocol(con);
+ return -1;
+ }
+- clear_bit(NEGOTIATING, &con->state);
+- set_bit(CONNECTED, &con->state);
++
++ BUG_ON(con->state != CON_STATE_NEGOTIATING);
++ con->state = CON_STATE_OPEN;
++
+ con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
+ con->connect_seq++;
+ con->peer_features = server_feat;
+@@ -1994,8 +1998,9 @@ more:
+ dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
+
+ /* open the socket first? */
+- if (con->sock == NULL) {
+- set_bit(CONNECTING, &con->state);
++ if (con->state == CON_STATE_PREOPEN) {
++ BUG_ON(con->sock);
++ con->state = CON_STATE_CONNECTING;
+
+ con_out_kvec_reset(con);
+ prepare_write_banner(con);
+@@ -2046,8 +2051,7 @@ more_kvec:
+ }
+
+ do_next:
+- if (!test_bit(CONNECTING, &con->state) &&
+- !test_bit(NEGOTIATING, &con->state)) {
++ if (con->state == CON_STATE_OPEN) {
+ /* is anything else pending? */
+ if (!list_empty(&con->out_queue)) {
+ prepare_write_message(con);
+@@ -2081,29 +2085,19 @@ static int try_read(struct ceph_connecti
+ {
+ int ret = -1;
+
+- if (!con->sock)
+- return 0;
+-
+- if (test_bit(STANDBY, &con->state))
++more:
++ dout("try_read start on %p state %lu\n", con, con->state);
++ if (con->state != CON_STATE_CONNECTING &&
++ con->state != CON_STATE_NEGOTIATING &&
++ con->state != CON_STATE_OPEN)
+ return 0;
+
+- dout("try_read start on %p\n", con);
++ BUG_ON(!con->sock);
+
+-more:
+ dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
+ con->in_base_pos);
+
+- /*
+- * process_connect and process_message drop and re-take
+- * con->mutex. make sure we handle a racing close or reopen.
+- */
+- if (test_bit(CLOSED, &con->state) ||
+- test_bit(OPENING, &con->state)) {
+- ret = -EAGAIN;
+- goto out;
+- }
+-
+- if (test_bit(CONNECTING, &con->state)) {
++ if (con->state == CON_STATE_CONNECTING) {
+ dout("try_read connecting\n");
+ ret = read_partial_banner(con);
+ if (ret <= 0)
+@@ -2112,8 +2106,8 @@ more:
+ if (ret < 0)
+ goto out;
+
+- clear_bit(CONNECTING, &con->state);
+- set_bit(NEGOTIATING, &con->state);
++ BUG_ON(con->state != CON_STATE_CONNECTING);
++ con->state = CON_STATE_NEGOTIATING;
+
+ /* Banner is good, exchange connection info */
+ ret = prepare_write_connect(con);
+@@ -2125,7 +2119,7 @@ more:
+ goto out;
+ }
+
+- if (test_bit(NEGOTIATING, &con->state)) {
++ if (con->state == CON_STATE_NEGOTIATING) {
+ dout("try_read negotiating\n");
+ ret = read_partial_connect(con);
+ if (ret <= 0)
+@@ -2136,6 +2130,8 @@ more:
+ goto more;
+ }
+
++ BUG_ON(con->state != CON_STATE_OPEN);
++
+ if (con->in_base_pos < 0) {
+ /*
+ * skipping + discarding content.
+@@ -2169,8 +2165,8 @@ more:
+ prepare_read_ack(con);
+ break;
+ case CEPH_MSGR_TAG_CLOSE:
+- clear_bit(CONNECTED, &con->state);
+- set_bit(CLOSED, &con->state); /* fixme */
++ con_close_socket(con);
++ con->state = CON_STATE_CLOSED;
+ goto out;
+ default:
+ goto bad_tag;
+@@ -2246,14 +2242,21 @@ static void con_work(struct work_struct
+ mutex_lock(&con->mutex);
+ restart:
+ if (test_and_clear_bit(SOCK_CLOSED, &con->flags)) {
+- if (test_and_clear_bit(CONNECTED, &con->state))
+- con->error_msg = "socket closed";
+- else if (test_and_clear_bit(NEGOTIATING, &con->state))
+- con->error_msg = "negotiation failed";
+- else if (test_and_clear_bit(CONNECTING, &con->state))
++ switch (con->state) {
++ case CON_STATE_CONNECTING:
+ con->error_msg = "connection failed";
+- else
++ break;
++ case CON_STATE_NEGOTIATING:
++ con->error_msg = "negotiation failed";
++ break;
++ case CON_STATE_OPEN:
++ con->error_msg = "socket closed";
++ break;
++ default:
++ dout("unrecognized con state %d\n", (int)con->state);
+ con->error_msg = "unrecognized con state";
++ BUG();
++ }
+ goto fault;
+ }
+
+@@ -2271,17 +2274,16 @@ restart:
+ }
+ }
+
+- if (test_bit(STANDBY, &con->state)) {
++ if (con->state == CON_STATE_STANDBY) {
+ dout("con_work %p STANDBY\n", con);
+ goto done;
+ }
+- if (test_bit(CLOSED, &con->state)) {
++ if (con->state == CON_STATE_CLOSED) {
+ dout("con_work %p CLOSED\n", con);
+ BUG_ON(con->sock);
+ goto done;
+ }
+- if (test_and_clear_bit(OPENING, &con->state)) {
+- /* reopen w/ new peer */
++ if (con->state == CON_STATE_PREOPEN) {
+ dout("con_work OPENING\n");
+ BUG_ON(con->sock);
+ }
+@@ -2328,13 +2330,15 @@ static void ceph_fault(struct ceph_conne
+ dout("fault %p state %lu to peer %s\n",
+ con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
+
+- if (test_bit(CLOSED, &con->state))
+- goto out_unlock;
++ BUG_ON(con->state != CON_STATE_CONNECTING &&
++ con->state != CON_STATE_NEGOTIATING &&
++ con->state != CON_STATE_OPEN);
+
+ con_close_socket(con);
+
+ if (test_bit(LOSSYTX, &con->flags)) {
+- dout("fault on LOSSYTX channel\n");
++ dout("fault on LOSSYTX channel, marking CLOSED\n");
++ con->state = CON_STATE_CLOSED;
+ goto out_unlock;
+ }
+
+@@ -2355,9 +2359,10 @@ static void ceph_fault(struct ceph_conne
+ !test_bit(KEEPALIVE_PENDING, &con->flags)) {
+ dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
+ clear_bit(WRITE_PENDING, &con->flags);
+- set_bit(STANDBY, &con->state);
++ con->state = CON_STATE_STANDBY;
+ } else {
+ /* retry after a delay. */
++ con->state = CON_STATE_PREOPEN;
+ if (con->delay == 0)
+ con->delay = BASE_DELAY_INTERVAL;
+ else if (con->delay < MAX_DELAY_INTERVAL)
+@@ -2431,8 +2436,9 @@ EXPORT_SYMBOL(ceph_messenger_init);
+ static void clear_standby(struct ceph_connection *con)
+ {
+ /* come back from STANDBY? */
+- if (test_and_clear_bit(STANDBY, &con->state)) {
++ if (con->state == CON_STATE_STANDBY) {
+ dout("clear_standby %p and ++connect_seq\n", con);
++ con->state = CON_STATE_PREOPEN;
+ con->connect_seq++;
+ WARN_ON(test_bit(WRITE_PENDING, &con->flags));
+ WARN_ON(test_bit(KEEPALIVE_PENDING, &con->flags));
+@@ -2451,7 +2457,7 @@ void ceph_con_send(struct ceph_connectio
+
+ mutex_lock(&con->mutex);
+
+- if (test_bit(CLOSED, &con->state)) {
++ if (con->state == CON_STATE_CLOSED) {
+ dout("con_send %p closed, dropping %p\n", con, msg);
+ ceph_msg_put(msg);
+ mutex_unlock(&con->mutex);
--- /dev/null
+From ecbcf07ec09575e41ebc9194bfc353773477fcd0 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Fri, 20 Jul 2012 17:29:55 -0700
+Subject: libceph: clean up con flags
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 4a8616920860920abaa51193146fe36b38ef09aa)
+
+Rename flags with CON_FLAG prefix, move the definitions into the c file,
+and (better) document their meaning.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/messenger.h | 10 ------
+ net/ceph/messenger.c | 62 +++++++++++++++++++++++------------------
+ 2 files changed, 36 insertions(+), 36 deletions(-)
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -107,16 +107,6 @@ struct ceph_msg_pos {
+ #define MAX_DELAY_INTERVAL (5 * 60 * HZ)
+
+ /*
+- * ceph_connection flag bits
+- */
+-
+-#define LOSSYTX 0 /* we can close channel or drop messages on errors */
+-#define KEEPALIVE_PENDING 3
+-#define WRITE_PENDING 4 /* we have data ready to send */
+-#define SOCK_CLOSED 11 /* socket state changed to closed */
+-#define BACKOFF 15
+-
+-/*
+ * A single connection with another host.
+ *
+ * We maintain a queue of outgoing messages, and some session state to
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -87,6 +87,15 @@
+ #define CON_STATE_OPEN 5 /* -> STANDBY, CLOSED */
+ #define CON_STATE_STANDBY 6 /* -> PREOPEN, CLOSED */
+
++/*
++ * ceph_connection flag bits
++ */
++#define CON_FLAG_LOSSYTX 0 /* we can close channel or drop
++ * messages on errors */
++#define CON_FLAG_KEEPALIVE_PENDING 1 /* we need to send a keepalive */
++#define CON_FLAG_WRITE_PENDING 2 /* we have data ready to send */
++#define CON_FLAG_SOCK_CLOSED 3 /* socket state changed to closed */
++#define CON_FLAG_BACKOFF 4 /* need to retry queuing delayed work */
+
+ /* static tag bytes (protocol control messages) */
+ static char tag_msg = CEPH_MSGR_TAG_MSG;
+@@ -288,7 +297,7 @@ static void ceph_sock_write_space(struct
+ * buffer. See net/ipv4/tcp_input.c:tcp_check_space()
+ * and net/core/stream.c:sk_stream_write_space().
+ */
+- if (test_bit(WRITE_PENDING, &con->flags)) {
++ if (test_bit(CON_FLAG_WRITE_PENDING, &con->flags)) {
+ if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+ dout("%s %p queueing write work\n", __func__, con);
+ clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+@@ -313,7 +322,7 @@ static void ceph_sock_state_change(struc
+ case TCP_CLOSE_WAIT:
+ dout("%s TCP_CLOSE_WAIT\n", __func__);
+ con_sock_state_closing(con);
+- set_bit(SOCK_CLOSED, &con->flags);
++ set_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
+ queue_con(con);
+ break;
+ case TCP_ESTABLISHED:
+@@ -449,12 +458,12 @@ static int con_close_socket(struct ceph_
+ con->sock = NULL;
+
+ /*
+- * Forcibly clear the SOCK_CLOSE flag. It gets set
++ * Forcibly clear the SOCK_CLOSED flag. It gets set
+ * independent of the connection mutex, and we could have
+ * received a socket close event before we had the chance to
+ * shut the socket down.
+ */
+- clear_bit(SOCK_CLOSED, &con->flags);
++ clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
+ con_sock_state_closed(con);
+ return rc;
+ }
+@@ -516,9 +525,9 @@ void ceph_con_close(struct ceph_connecti
+ ceph_pr_addr(&con->peer_addr.in_addr));
+ con->state = CON_STATE_CLOSED;
+
+- clear_bit(LOSSYTX, &con->flags); /* so we retry next connect */
+- clear_bit(KEEPALIVE_PENDING, &con->flags);
+- clear_bit(WRITE_PENDING, &con->flags);
++ clear_bit(CON_FLAG_LOSSYTX, &con->flags); /* so we retry next connect */
++ clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
++ clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+
+ reset_connection(con);
+ con->peer_global_seq = 0;
+@@ -770,7 +779,7 @@ static void prepare_write_message(struct
+ /* no, queue up footer too and be done */
+ prepare_write_message_footer(con);
+
+- set_bit(WRITE_PENDING, &con->flags);
++ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+ }
+
+ /*
+@@ -791,7 +800,7 @@ static void prepare_write_ack(struct cep
+ &con->out_temp_ack);
+
+ con->out_more = 1; /* more will follow.. eventually.. */
+- set_bit(WRITE_PENDING, &con->flags);
++ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+ }
+
+ /*
+@@ -802,7 +811,7 @@ static void prepare_write_keepalive(stru
+ dout("prepare_write_keepalive %p\n", con);
+ con_out_kvec_reset(con);
+ con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
+- set_bit(WRITE_PENDING, &con->flags);
++ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+ }
+
+ /*
+@@ -845,7 +854,7 @@ static void prepare_write_banner(struct
+ &con->msgr->my_enc_addr);
+
+ con->out_more = 0;
+- set_bit(WRITE_PENDING, &con->flags);
++ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+ }
+
+ static int prepare_write_connect(struct ceph_connection *con)
+@@ -896,7 +905,7 @@ static int prepare_write_connect(struct
+ auth->authorizer_buf);
+
+ con->out_more = 0;
+- set_bit(WRITE_PENDING, &con->flags);
++ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+
+ return 0;
+ }
+@@ -1622,7 +1631,7 @@ static int process_connect(struct ceph_c
+ le32_to_cpu(con->in_reply.connect_seq));
+
+ if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
+- set_bit(LOSSYTX, &con->flags);
++ set_bit(CON_FLAG_LOSSYTX, &con->flags);
+
+ con->delay = 0; /* reset backoff memory */
+
+@@ -2061,14 +2070,15 @@ do_next:
+ prepare_write_ack(con);
+ goto more;
+ }
+- if (test_and_clear_bit(KEEPALIVE_PENDING, &con->flags)) {
++ if (test_and_clear_bit(CON_FLAG_KEEPALIVE_PENDING,
++ &con->flags)) {
+ prepare_write_keepalive(con);
+ goto more;
+ }
+ }
+
+ /* Nothing to do! */
+- clear_bit(WRITE_PENDING, &con->flags);
++ clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+ dout("try_write nothing else to write.\n");
+ ret = 0;
+ out:
+@@ -2241,7 +2251,7 @@ static void con_work(struct work_struct
+
+ mutex_lock(&con->mutex);
+ restart:
+- if (test_and_clear_bit(SOCK_CLOSED, &con->flags)) {
++ if (test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) {
+ switch (con->state) {
+ case CON_STATE_CONNECTING:
+ con->error_msg = "connection failed";
+@@ -2260,7 +2270,7 @@ restart:
+ goto fault;
+ }
+
+- if (test_and_clear_bit(BACKOFF, &con->flags)) {
++ if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) {
+ dout("con_work %p backing off\n", con);
+ if (queue_delayed_work(ceph_msgr_wq, &con->work,
+ round_jiffies_relative(con->delay))) {
+@@ -2336,7 +2346,7 @@ static void ceph_fault(struct ceph_conne
+
+ con_close_socket(con);
+
+- if (test_bit(LOSSYTX, &con->flags)) {
++ if (test_bit(CON_FLAG_LOSSYTX, &con->flags)) {
+ dout("fault on LOSSYTX channel, marking CLOSED\n");
+ con->state = CON_STATE_CLOSED;
+ goto out_unlock;
+@@ -2356,9 +2366,9 @@ static void ceph_fault(struct ceph_conne
+ /* If there are no messages queued or keepalive pending, place
+ * the connection in a STANDBY state */
+ if (list_empty(&con->out_queue) &&
+- !test_bit(KEEPALIVE_PENDING, &con->flags)) {
++ !test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)) {
+ dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
+- clear_bit(WRITE_PENDING, &con->flags);
++ clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+ con->state = CON_STATE_STANDBY;
+ } else {
+ /* retry after a delay. */
+@@ -2383,7 +2393,7 @@ static void ceph_fault(struct ceph_conne
+ * that when con_work restarts we schedule the
+ * delay then.
+ */
+- set_bit(BACKOFF, &con->flags);
++ set_bit(CON_FLAG_BACKOFF, &con->flags);
+ }
+ }
+
+@@ -2440,8 +2450,8 @@ static void clear_standby(struct ceph_co
+ dout("clear_standby %p and ++connect_seq\n", con);
+ con->state = CON_STATE_PREOPEN;
+ con->connect_seq++;
+- WARN_ON(test_bit(WRITE_PENDING, &con->flags));
+- WARN_ON(test_bit(KEEPALIVE_PENDING, &con->flags));
++ WARN_ON(test_bit(CON_FLAG_WRITE_PENDING, &con->flags));
++ WARN_ON(test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags));
+ }
+ }
+
+@@ -2482,7 +2492,7 @@ void ceph_con_send(struct ceph_connectio
+
+ /* if there wasn't anything waiting to send before, queue
+ * new work */
+- if (test_and_set_bit(WRITE_PENDING, &con->flags) == 0)
++ if (test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0)
+ queue_con(con);
+ }
+ EXPORT_SYMBOL(ceph_con_send);
+@@ -2571,8 +2581,8 @@ void ceph_con_keepalive(struct ceph_conn
+ mutex_lock(&con->mutex);
+ clear_standby(con);
+ mutex_unlock(&con->mutex);
+- if (test_and_set_bit(KEEPALIVE_PENDING, &con->flags) == 0 &&
+- test_and_set_bit(WRITE_PENDING, &con->flags) == 0)
++ if (test_and_set_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags) == 0 &&
++ test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0)
+ queue_con(con);
+ }
+ EXPORT_SYMBOL(ceph_con_keepalive);
--- /dev/null
+From 9f2d0c057f5e85a789258cccfedcf52fb28f5c9c Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Fri, 20 Jul 2012 17:30:40 -0700
+Subject: libceph: clear all flags on con_close
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+(cherry picked from commit 43c7427d100769451601b8a36988ac0528ce0124)
+---
+ net/ceph/messenger.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -528,6 +528,8 @@ void ceph_con_close(struct ceph_connecti
+ clear_bit(CON_FLAG_LOSSYTX, &con->flags); /* so we retry next connect */
+ clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
+ clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
++ clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
++ clear_bit(CON_FLAG_BACKOFF, &con->flags);
+
+ reset_connection(con);
+ con->peer_global_seq = 0;
0077-libceph-distinguish-two-phases-of-connect-sequence.patch
0078-libceph-small-changes-to-messenger.c.patch
0079-libceph-add-some-fine-ASCII-art.patch
+0080-libceph-set-peer-name-on-con_open-not-init.patch
+0081-libceph-initialize-mon_client-con-only-once.patch
+0082-libceph-allow-sock-transition-from-CONNECTING-to-CLO.patch
+0083-libceph-initialize-msgpool-message-types.patch
+0084-libceph-prevent-the-race-of-incoming-work-during-tea.patch
+0085-libceph-report-socket-read-write-error-message.patch
+0086-libceph-fix-mutex-coverage-for-ceph_con_close.patch
+0087-libceph-resubmit-linger-ops-when-pg-mapping-changes.patch
+0088-libceph-re-initialize-bio_iter-on-start-of-message-r.patch
+0089-libceph-protect-ceph_con_open-with-mutex.patch
+0090-libceph-reset-connection-retry-on-successfully-negot.patch
+0091-libceph-fix-fault-locking-close-socket-on-lossy-faul.patch
+0092-libceph-move-msgr-clear_standby-under-con-mutex-prot.patch
+0093-libceph-move-ceph_con_send-closed-check-under-the-co.patch
+0094-libceph-drop-gratuitous-socket-close-calls-in-con_wo.patch
+0095-libceph-close-socket-directly-from-ceph_con_close.patch
+0096-libceph-drop-unnecessary-CLOSED-check-in-socket-stat.patch
+0097-libceph-replace-connection-state-bits-with-states.patch
+0098-libceph-clean-up-con-flags.patch
+0099-libceph-clear-all-flags-on-con_close.patch