--- /dev/null
+From 5566f701581eecb9bb825d4db233256106ae9bd6 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Tue, 22 May 2012 11:41:43 -0500
+Subject: libceph: rename socket callbacks
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 327800bdc2cb9b71f4b458ca07aa9d522668dde0)
+
+Change the names of the three socket callback functions to make it
+more obvious they're specifically associated with a connection's
+socket (not the ceph connection that uses it).
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 28 ++++++++++++++--------------
+ 1 file changed, 14 insertions(+), 14 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -153,46 +153,46 @@ EXPORT_SYMBOL(ceph_msgr_flush);
+ */
+
+ /* data available on socket, or listen socket received a connect */
+-static void ceph_data_ready(struct sock *sk, int count_unused)
++static void ceph_sock_data_ready(struct sock *sk, int count_unused)
+ {
+ struct ceph_connection *con = sk->sk_user_data;
+
+ if (sk->sk_state != TCP_CLOSE_WAIT) {
+- dout("ceph_data_ready on %p state = %lu, queueing work\n",
++ dout("%s on %p state = %lu, queueing work\n", __func__,
+ con, con->state);
+ queue_con(con);
+ }
+ }
+
+ /* socket has buffer space for writing */
+-static void ceph_write_space(struct sock *sk)
++static void ceph_sock_write_space(struct sock *sk)
+ {
+ struct ceph_connection *con = sk->sk_user_data;
+
+ /* only queue to workqueue if there is data we want to write,
+ * and there is sufficient space in the socket buffer to accept
+- * more data. clear SOCK_NOSPACE so that ceph_write_space()
++ * more data. clear SOCK_NOSPACE so that ceph_sock_write_space()
+ * doesn't get called again until try_write() fills the socket
+ * buffer. See net/ipv4/tcp_input.c:tcp_check_space()
+ * and net/core/stream.c:sk_stream_write_space().
+ */
+ if (test_bit(WRITE_PENDING, &con->state)) {
+ if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+- dout("ceph_write_space %p queueing write work\n", con);
++ dout("%s %p queueing write work\n", __func__, con);
+ clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ queue_con(con);
+ }
+ } else {
+- dout("ceph_write_space %p nothing to write\n", con);
++ dout("%s %p nothing to write\n", __func__, con);
+ }
+ }
+
+ /* socket's state has changed */
+-static void ceph_state_change(struct sock *sk)
++static void ceph_sock_state_change(struct sock *sk)
+ {
+ struct ceph_connection *con = sk->sk_user_data;
+
+- dout("ceph_state_change %p state = %lu sk_state = %u\n",
++ dout("%s %p state = %lu sk_state = %u\n", __func__,
+ con, con->state, sk->sk_state);
+
+ if (test_bit(CLOSED, &con->state))
+@@ -200,9 +200,9 @@ static void ceph_state_change(struct soc
+
+ switch (sk->sk_state) {
+ case TCP_CLOSE:
+- dout("ceph_state_change TCP_CLOSE\n");
++ dout("%s TCP_CLOSE\n", __func__);
+ case TCP_CLOSE_WAIT:
+- dout("ceph_state_change TCP_CLOSE_WAIT\n");
++ dout("%s TCP_CLOSE_WAIT\n", __func__);
+ if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) {
+ if (test_bit(CONNECTING, &con->state))
+ con->error_msg = "connection failed";
+@@ -212,7 +212,7 @@ static void ceph_state_change(struct soc
+ }
+ break;
+ case TCP_ESTABLISHED:
+- dout("ceph_state_change TCP_ESTABLISHED\n");
++ dout("%s TCP_ESTABLISHED\n", __func__);
+ queue_con(con);
+ break;
+ default: /* Everything else is uninteresting */
+@@ -228,9 +228,9 @@ static void set_sock_callbacks(struct so
+ {
+ struct sock *sk = sock->sk;
+ sk->sk_user_data = con;
+- sk->sk_data_ready = ceph_data_ready;
+- sk->sk_write_space = ceph_write_space;
+- sk->sk_state_change = ceph_state_change;
++ sk->sk_data_ready = ceph_sock_data_ready;
++ sk->sk_write_space = ceph_sock_write_space;
++ sk->sk_state_change = ceph_sock_state_change;
+ }
+
+
--- /dev/null
+From 2a06b676ea95fd6a85292a8b93f205867f4cbfef Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 23 May 2012 14:35:23 -0500
+Subject: libceph: rename kvec_reset and kvec_add functions
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit e22004235a900213625acd6583ac913d5a30c155)
+
+The functions ceph_con_out_kvec_reset() and ceph_con_out_kvec_add()
+are entirely private functions, so drop the "ceph_" prefix in their
+name to make them slightly more wieldy.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 48 ++++++++++++++++++++++++------------------------
+ 1 file changed, 24 insertions(+), 24 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -486,14 +486,14 @@ static u32 get_global_seq(struct ceph_me
+ return ret;
+ }
+
+-static void ceph_con_out_kvec_reset(struct ceph_connection *con)
++static void con_out_kvec_reset(struct ceph_connection *con)
+ {
+ con->out_kvec_left = 0;
+ con->out_kvec_bytes = 0;
+ con->out_kvec_cur = &con->out_kvec[0];
+ }
+
+-static void ceph_con_out_kvec_add(struct ceph_connection *con,
++static void con_out_kvec_add(struct ceph_connection *con,
+ size_t size, void *data)
+ {
+ int index;
+@@ -534,7 +534,7 @@ static void prepare_write_message(struct
+ struct ceph_msg *m;
+ u32 crc;
+
+- ceph_con_out_kvec_reset(con);
++ con_out_kvec_reset(con);
+ con->out_kvec_is_msg = true;
+ con->out_msg_done = false;
+
+@@ -542,9 +542,9 @@ static void prepare_write_message(struct
+ * TCP packet that's a good thing. */
+ if (con->in_seq > con->in_seq_acked) {
+ con->in_seq_acked = con->in_seq;
+- ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
++ con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
+ con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
+- ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack),
++ con_out_kvec_add(con, sizeof (con->out_temp_ack),
+ &con->out_temp_ack);
+ }
+
+@@ -576,12 +576,12 @@ static void prepare_write_message(struct
+ BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
+
+ /* tag + hdr + front + middle */
+- ceph_con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
+- ceph_con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
+- ceph_con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
++ con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
++ con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
++ con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
+
+ if (m->middle)
+- ceph_con_out_kvec_add(con, m->middle->vec.iov_len,
++ con_out_kvec_add(con, m->middle->vec.iov_len,
+ m->middle->vec.iov_base);
+
+ /* fill in crc (except data pages), footer */
+@@ -630,12 +630,12 @@ static void prepare_write_ack(struct cep
+ con->in_seq_acked, con->in_seq);
+ con->in_seq_acked = con->in_seq;
+
+- ceph_con_out_kvec_reset(con);
++ con_out_kvec_reset(con);
+
+- ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
++ con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
+
+ con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
+- ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack),
++ con_out_kvec_add(con, sizeof (con->out_temp_ack),
+ &con->out_temp_ack);
+
+ con->out_more = 1; /* more will follow.. eventually.. */
+@@ -648,8 +648,8 @@ static void prepare_write_ack(struct cep
+ static void prepare_write_keepalive(struct ceph_connection *con)
+ {
+ dout("prepare_write_keepalive %p\n", con);
+- ceph_con_out_kvec_reset(con);
+- ceph_con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
++ con_out_kvec_reset(con);
++ con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
+ set_bit(WRITE_PENDING, &con->state);
+ }
+
+@@ -694,8 +694,8 @@ static struct ceph_auth_handshake *get_c
+ */
+ static void prepare_write_banner(struct ceph_connection *con)
+ {
+- ceph_con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
+- ceph_con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr),
++ con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
++ con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr),
+ &con->msgr->my_enc_addr);
+
+ con->out_more = 0;
+@@ -742,10 +742,10 @@ static int prepare_write_connect(struct
+ con->out_connect.authorizer_len = auth ?
+ cpu_to_le32(auth->authorizer_buf_len) : 0;
+
+- ceph_con_out_kvec_add(con, sizeof (con->out_connect),
++ con_out_kvec_add(con, sizeof (con->out_connect),
+ &con->out_connect);
+ if (auth && auth->authorizer_buf_len)
+- ceph_con_out_kvec_add(con, auth->authorizer_buf_len,
++ con_out_kvec_add(con, auth->authorizer_buf_len,
+ auth->authorizer_buf);
+
+ con->out_more = 0;
+@@ -939,7 +939,7 @@ static int write_partial_msg_pages(struc
+ /* prepare and queue up footer, too */
+ if (!do_datacrc)
+ con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
+- ceph_con_out_kvec_reset(con);
++ con_out_kvec_reset(con);
+ prepare_write_message_footer(con);
+ ret = 1;
+ out:
+@@ -1402,7 +1402,7 @@ static int process_connect(struct ceph_c
+ return -1;
+ }
+ con->auth_retry = 1;
+- ceph_con_out_kvec_reset(con);
++ con_out_kvec_reset(con);
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ return ret;
+@@ -1423,7 +1423,7 @@ static int process_connect(struct ceph_c
+ ENTITY_NAME(con->peer_name),
+ ceph_pr_addr(&con->peer_addr.in_addr));
+ reset_connection(con);
+- ceph_con_out_kvec_reset(con);
++ con_out_kvec_reset(con);
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ return ret;
+@@ -1449,7 +1449,7 @@ static int process_connect(struct ceph_c
+ le32_to_cpu(con->out_connect.connect_seq),
+ le32_to_cpu(con->in_reply.connect_seq));
+ con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
+- ceph_con_out_kvec_reset(con);
++ con_out_kvec_reset(con);
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ return ret;
+@@ -1466,7 +1466,7 @@ static int process_connect(struct ceph_c
+ le32_to_cpu(con->in_reply.global_seq));
+ get_global_seq(con->msgr,
+ le32_to_cpu(con->in_reply.global_seq));
+- ceph_con_out_kvec_reset(con);
++ con_out_kvec_reset(con);
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ return ret;
+@@ -1873,7 +1873,7 @@ more:
+
+ /* open the socket first? */
+ if (con->sock == NULL) {
+- ceph_con_out_kvec_reset(con);
++ con_out_kvec_reset(con);
+ prepare_write_banner(con);
+ ret = prepare_write_connect(con);
+ if (ret < 0)
--- /dev/null
+From ab20b55d471452332ce9e1b76ea7a522999e2055 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Sat, 26 May 2012 23:26:43 -0500
+Subject: libceph: embed ceph messenger structure in ceph_client
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 15d9882c336db2db73ccf9871ae2398e452f694c)
+
+A ceph client has a pointer to a ceph messenger structure in it.
+There is always exactly one ceph messenger for a ceph client, so
+there is no need to allocate it separate from the ceph client
+structure.
+
+Switch the ceph_client structure to embed its ceph_messenger
+structure.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/mds_client.c | 2 +-
+ include/linux/ceph/libceph.h | 2 +-
+ include/linux/ceph/messenger.h | 9 +++++----
+ net/ceph/ceph_common.c | 18 +++++-------------
+ net/ceph/messenger.c | 30 +++++++++---------------------
+ net/ceph/mon_client.c | 6 +++---
+ net/ceph/osd_client.c | 4 ++--
+ 7 files changed, 26 insertions(+), 45 deletions(-)
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -394,7 +394,7 @@ static struct ceph_mds_session *register
+ s->s_seq = 0;
+ mutex_init(&s->s_mutex);
+
+- ceph_con_init(mdsc->fsc->client->msgr, &s->s_con);
++ ceph_con_init(&mdsc->fsc->client->msgr, &s->s_con);
+ s->s_con.private = s;
+ s->s_con.ops = &mds_con_ops;
+ s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS;
+--- a/include/linux/ceph/libceph.h
++++ b/include/linux/ceph/libceph.h
+@@ -132,7 +132,7 @@ struct ceph_client {
+ u32 supported_features;
+ u32 required_features;
+
+- struct ceph_messenger *msgr; /* messenger instance */
++ struct ceph_messenger msgr; /* messenger instance */
+ struct ceph_mon_client monc;
+ struct ceph_osd_client osdc;
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -203,10 +203,11 @@ extern int ceph_msgr_init(void);
+ extern void ceph_msgr_exit(void);
+ extern void ceph_msgr_flush(void);
+
+-extern struct ceph_messenger *ceph_messenger_create(
+- struct ceph_entity_addr *myaddr,
+- u32 features, u32 required);
+-extern void ceph_messenger_destroy(struct ceph_messenger *);
++extern void ceph_messenger_init(struct ceph_messenger *msgr,
++ struct ceph_entity_addr *myaddr,
++ u32 supported_features,
++ u32 required_features,
++ bool nocrc);
+
+ extern void ceph_con_init(struct ceph_messenger *msgr,
+ struct ceph_connection *con);
+--- a/net/ceph/ceph_common.c
++++ b/net/ceph/ceph_common.c
+@@ -468,19 +468,15 @@ struct ceph_client *ceph_create_client(s
+ /* msgr */
+ if (ceph_test_opt(client, MYIP))
+ myaddr = &client->options->my_addr;
+- client->msgr = ceph_messenger_create(myaddr,
+- client->supported_features,
+- client->required_features);
+- if (IS_ERR(client->msgr)) {
+- err = PTR_ERR(client->msgr);
+- goto fail;
+- }
+- client->msgr->nocrc = ceph_test_opt(client, NOCRC);
++ ceph_messenger_init(&client->msgr, myaddr,
++ client->supported_features,
++ client->required_features,
++ ceph_test_opt(client, NOCRC));
+
+ /* subsystems */
+ err = ceph_monc_init(&client->monc, client);
+ if (err < 0)
+- goto fail_msgr;
++ goto fail;
+ err = ceph_osdc_init(&client->osdc, client);
+ if (err < 0)
+ goto fail_monc;
+@@ -489,8 +485,6 @@ struct ceph_client *ceph_create_client(s
+
+ fail_monc:
+ ceph_monc_stop(&client->monc);
+-fail_msgr:
+- ceph_messenger_destroy(client->msgr);
+ fail:
+ kfree(client);
+ return ERR_PTR(err);
+@@ -508,8 +502,6 @@ void ceph_destroy_client(struct ceph_cli
+
+ ceph_debugfs_client_cleanup(client);
+
+- ceph_messenger_destroy(client->msgr);
+-
+ ceph_destroy_options(client->options);
+
+ kfree(client);
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2249,18 +2249,14 @@ out:
+
+
+ /*
+- * create a new messenger instance
++ * initialize a new messenger instance
+ */
+-struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
+- u32 supported_features,
+- u32 required_features)
++void ceph_messenger_init(struct ceph_messenger *msgr,
++ struct ceph_entity_addr *myaddr,
++ u32 supported_features,
++ u32 required_features,
++ bool nocrc)
+ {
+- struct ceph_messenger *msgr;
+-
+- msgr = kzalloc(sizeof(*msgr), GFP_KERNEL);
+- if (msgr == NULL)
+- return ERR_PTR(-ENOMEM);
+-
+ msgr->supported_features = supported_features;
+ msgr->required_features = required_features;
+
+@@ -2273,19 +2269,11 @@ struct ceph_messenger *ceph_messenger_cr
+ msgr->inst.addr.type = 0;
+ get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
+ encode_my_addr(msgr);
++ msgr->nocrc = nocrc;
+
+- dout("messenger_create %p\n", msgr);
+- return msgr;
+-}
+-EXPORT_SYMBOL(ceph_messenger_create);
+-
+-void ceph_messenger_destroy(struct ceph_messenger *msgr)
+-{
+- dout("destroy %p\n", msgr);
+- kfree(msgr);
+- dout("destroyed messenger %p\n", msgr);
++ dout("%s %p\n", __func__, msgr);
+ }
+-EXPORT_SYMBOL(ceph_messenger_destroy);
++EXPORT_SYMBOL(ceph_messenger_init);
+
+ static void clear_standby(struct ceph_connection *con)
+ {
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -763,7 +763,7 @@ int ceph_monc_init(struct ceph_mon_clien
+ monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL);
+ if (!monc->con)
+ goto out_monmap;
+- ceph_con_init(monc->client->msgr, monc->con);
++ ceph_con_init(&monc->client->msgr, monc->con);
+ monc->con->private = monc;
+ monc->con->ops = &mon_con_ops;
+
+@@ -888,8 +888,8 @@ static void handle_auth_reply(struct cep
+ } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
+ dout("authenticated, starting session\n");
+
+- monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
+- monc->client->msgr->inst.name.num =
++ monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
++ monc->client->msgr.inst.name.num =
+ cpu_to_le64(monc->auth->global_id);
+
+ __send_subscribe(monc);
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -639,7 +639,7 @@ static struct ceph_osd *create_osd(struc
+ INIT_LIST_HEAD(&osd->o_osd_lru);
+ osd->o_incarnation = 1;
+
+- ceph_con_init(osdc->client->msgr, &osd->o_con);
++ ceph_con_init(&osdc->client->msgr, &osd->o_con);
+ osd->o_con.private = osd;
+ osd->o_con.ops = &osd_con_ops;
+ osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD;
+@@ -1391,7 +1391,7 @@ void ceph_osdc_handle_map(struct ceph_os
+ epoch, maplen);
+ newmap = osdmap_apply_incremental(&p, next,
+ osdc->osdmap,
+- osdc->client->msgr);
++ &osdc->client->msgr);
+ if (IS_ERR(newmap)) {
+ err = PTR_ERR(newmap);
+ goto bad;
--- /dev/null
+From f47510a7851c8440845a0c1a7184323cc10ec15d Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Tue, 22 May 2012 11:41:43 -0500
+Subject: libceph: start separating connection flags from state
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 928443cd9644e7cfd46f687dbeffda2d1a357ff9)
+
+A ceph_connection holds a mixture of connection state (as in "state
+machine" state) and connection flags in a single "state" field. To
+make the distinction more clear, define a new "flags" field and use
+it rather than the "state" field to hold Boolean flag values.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil<sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/messenger.h | 18 +++++++++-----
+ net/ceph/messenger.c | 50 ++++++++++++++++++++---------------------
+ 2 files changed, 37 insertions(+), 31 deletions(-)
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -103,20 +103,25 @@ struct ceph_msg_pos {
+ #define MAX_DELAY_INTERVAL (5 * 60 * HZ)
+
+ /*
+- * ceph_connection state bit flags
++ * ceph_connection flag bits
+ */
++
+ #define LOSSYTX 0 /* we can close channel or drop messages on errors */
+-#define CONNECTING 1
+-#define NEGOTIATING 2
+ #define KEEPALIVE_PENDING 3
+ #define WRITE_PENDING 4 /* we have data ready to send */
++#define SOCK_CLOSED 11 /* socket state changed to closed */
++#define BACKOFF 15
++
++/*
++ * ceph_connection states
++ */
++#define CONNECTING 1
++#define NEGOTIATING 2
+ #define STANDBY 8 /* no outgoing messages, socket closed. we keep
+ * the ceph_connection around to maintain shared
+ * state with the peer. */
+ #define CLOSED 10 /* we've closed the connection */
+-#define SOCK_CLOSED 11 /* socket state changed to closed */
+ #define OPENING 13 /* open connection w/ (possibly new) peer */
+-#define BACKOFF 15
+
+ /*
+ * A single connection with another host.
+@@ -133,7 +138,8 @@ struct ceph_connection {
+
+ struct ceph_messenger *msgr;
+ struct socket *sock;
+- unsigned long state; /* connection state (see flags above) */
++ unsigned long flags;
++ unsigned long state;
+ const char *error_msg; /* error message, if any */
+
+ struct ceph_entity_addr peer_addr; /* peer address */
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -176,7 +176,7 @@ static void ceph_sock_write_space(struct
+ * buffer. See net/ipv4/tcp_input.c:tcp_check_space()
+ * and net/core/stream.c:sk_stream_write_space().
+ */
+- if (test_bit(WRITE_PENDING, &con->state)) {
++ if (test_bit(WRITE_PENDING, &con->flags)) {
+ if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+ dout("%s %p queueing write work\n", __func__, con);
+ clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+@@ -203,7 +203,7 @@ static void ceph_sock_state_change(struc
+ dout("%s TCP_CLOSE\n", __func__);
+ case TCP_CLOSE_WAIT:
+ dout("%s TCP_CLOSE_WAIT\n", __func__);
+- if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) {
++ if (test_and_set_bit(SOCK_CLOSED, &con->flags) == 0) {
+ if (test_bit(CONNECTING, &con->state))
+ con->error_msg = "connection failed";
+ else
+@@ -395,9 +395,9 @@ void ceph_con_close(struct ceph_connecti
+ ceph_pr_addr(&con->peer_addr.in_addr));
+ set_bit(CLOSED, &con->state); /* in case there's queued work */
+ clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */
+- clear_bit(LOSSYTX, &con->state); /* so we retry next connect */
+- clear_bit(KEEPALIVE_PENDING, &con->state);
+- clear_bit(WRITE_PENDING, &con->state);
++ clear_bit(LOSSYTX, &con->flags); /* so we retry next connect */
++ clear_bit(KEEPALIVE_PENDING, &con->flags);
++ clear_bit(WRITE_PENDING, &con->flags);
+ mutex_lock(&con->mutex);
+ reset_connection(con);
+ con->peer_global_seq = 0;
+@@ -618,7 +618,7 @@ static void prepare_write_message(struct
+ prepare_write_message_footer(con);
+ }
+
+- set_bit(WRITE_PENDING, &con->state);
++ set_bit(WRITE_PENDING, &con->flags);
+ }
+
+ /*
+@@ -639,7 +639,7 @@ static void prepare_write_ack(struct cep
+ &con->out_temp_ack);
+
+ con->out_more = 1; /* more will follow.. eventually.. */
+- set_bit(WRITE_PENDING, &con->state);
++ set_bit(WRITE_PENDING, &con->flags);
+ }
+
+ /*
+@@ -650,7 +650,7 @@ static void prepare_write_keepalive(stru
+ dout("prepare_write_keepalive %p\n", con);
+ con_out_kvec_reset(con);
+ con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
+- set_bit(WRITE_PENDING, &con->state);
++ set_bit(WRITE_PENDING, &con->flags);
+ }
+
+ /*
+@@ -679,7 +679,7 @@ static struct ceph_auth_handshake *get_c
+
+ if (IS_ERR(auth))
+ return auth;
+- if (test_bit(CLOSED, &con->state) || test_bit(OPENING, &con->state))
++ if (test_bit(CLOSED, &con->state) || test_bit(OPENING, &con->flags))
+ return ERR_PTR(-EAGAIN);
+
+ con->auth_reply_buf = auth->authorizer_reply_buf;
+@@ -699,7 +699,7 @@ static void prepare_write_banner(struct
+ &con->msgr->my_enc_addr);
+
+ con->out_more = 0;
+- set_bit(WRITE_PENDING, &con->state);
++ set_bit(WRITE_PENDING, &con->flags);
+ }
+
+ static int prepare_write_connect(struct ceph_connection *con)
+@@ -749,7 +749,7 @@ static int prepare_write_connect(struct
+ auth->authorizer_buf);
+
+ con->out_more = 0;
+- set_bit(WRITE_PENDING, &con->state);
++ set_bit(WRITE_PENDING, &con->flags);
+
+ return 0;
+ }
+@@ -1496,7 +1496,7 @@ static int process_connect(struct ceph_c
+ le32_to_cpu(con->in_reply.connect_seq));
+
+ if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
+- set_bit(LOSSYTX, &con->state);
++ set_bit(LOSSYTX, &con->flags);
+
+ prepare_read_tag(con);
+ break;
+@@ -1937,14 +1937,14 @@ do_next:
+ prepare_write_ack(con);
+ goto more;
+ }
+- if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) {
++ if (test_and_clear_bit(KEEPALIVE_PENDING, &con->flags)) {
+ prepare_write_keepalive(con);
+ goto more;
+ }
+ }
+
+ /* Nothing to do! */
+- clear_bit(WRITE_PENDING, &con->state);
++ clear_bit(WRITE_PENDING, &con->flags);
+ dout("try_write nothing else to write.\n");
+ ret = 0;
+ out:
+@@ -2110,7 +2110,7 @@ static void con_work(struct work_struct
+
+ mutex_lock(&con->mutex);
+ restart:
+- if (test_and_clear_bit(BACKOFF, &con->state)) {
++ if (test_and_clear_bit(BACKOFF, &con->flags)) {
+ dout("con_work %p backing off\n", con);
+ if (queue_delayed_work(ceph_msgr_wq, &con->work,
+ round_jiffies_relative(con->delay))) {
+@@ -2139,7 +2139,7 @@ restart:
+ con_close_socket(con);
+ }
+
+- if (test_and_clear_bit(SOCK_CLOSED, &con->state))
++ if (test_and_clear_bit(SOCK_CLOSED, &con->flags))
+ goto fault;
+
+ ret = try_read(con);
+@@ -2178,7 +2178,7 @@ static void ceph_fault(struct ceph_conne
+ dout("fault %p state %lu to peer %s\n",
+ con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
+
+- if (test_bit(LOSSYTX, &con->state)) {
++ if (test_bit(LOSSYTX, &con->flags)) {
+ dout("fault on LOSSYTX channel\n");
+ goto out;
+ }
+@@ -2200,9 +2200,9 @@ static void ceph_fault(struct ceph_conne
+ /* If there are no messages queued or keepalive pending, place
+ * the connection in a STANDBY state */
+ if (list_empty(&con->out_queue) &&
+- !test_bit(KEEPALIVE_PENDING, &con->state)) {
++ !test_bit(KEEPALIVE_PENDING, &con->flags)) {
+ dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
+- clear_bit(WRITE_PENDING, &con->state);
++ clear_bit(WRITE_PENDING, &con->flags);
+ set_bit(STANDBY, &con->state);
+ } else {
+ /* retry after a delay. */
+@@ -2226,7 +2226,7 @@ static void ceph_fault(struct ceph_conne
+ * that when con_work restarts we schedule the
+ * delay then.
+ */
+- set_bit(BACKOFF, &con->state);
++ set_bit(BACKOFF, &con->flags);
+ }
+ }
+
+@@ -2282,8 +2282,8 @@ static void clear_standby(struct ceph_co
+ mutex_lock(&con->mutex);
+ dout("clear_standby %p and ++connect_seq\n", con);
+ con->connect_seq++;
+- WARN_ON(test_bit(WRITE_PENDING, &con->state));
+- WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state));
++ WARN_ON(test_bit(WRITE_PENDING, &con->flags));
++ WARN_ON(test_bit(KEEPALIVE_PENDING, &con->flags));
+ mutex_unlock(&con->mutex);
+ }
+ }
+@@ -2321,7 +2321,7 @@ void ceph_con_send(struct ceph_connectio
+ /* if there wasn't anything waiting to send before, queue
+ * new work */
+ clear_standby(con);
+- if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
++ if (test_and_set_bit(WRITE_PENDING, &con->flags) == 0)
+ queue_con(con);
+ }
+ EXPORT_SYMBOL(ceph_con_send);
+@@ -2388,8 +2388,8 @@ void ceph_con_keepalive(struct ceph_conn
+ {
+ dout("con_keepalive %p\n", con);
+ clear_standby(con);
+- if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 &&
+- test_and_set_bit(WRITE_PENDING, &con->state) == 0)
++ if (test_and_set_bit(KEEPALIVE_PENDING, &con->flags) == 0 &&
++ test_and_set_bit(WRITE_PENDING, &con->flags) == 0)
+ queue_con(con);
+ }
+ EXPORT_SYMBOL(ceph_con_keepalive);
--- /dev/null
+From 1dc8c9af19ebb0486a3d573579358c4a3f918bb0 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Tue, 22 May 2012 22:15:49 -0500
+Subject: libceph: start tracking connection socket state
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit ce2c8903e76e690846a00a0284e4bd9ee954d680)
+
+Start explicitly keeping track of the state of a ceph connection's
+socket, separate from the state of the connection itself. Create
+placeholder functions to encapsulate the state transitions.
+
+ --------
+ | NEW* | transient initial state
+ --------
+ | con_sock_state_init()
+ v
+ ----------
+ | CLOSED | initialized, but no socket (and no
+ ---------- TCP connection)
+ ^ \
+ | \ con_sock_state_connecting()
+ | ----------------------
+ | \
+ + con_sock_state_closed() \
+ |\ \
+ | \ \
+ | ----------- \
+ | | CLOSING | socket event; \
+ | ----------- await close \
+ | ^ |
+ | | |
+ | + con_sock_state_closing() |
+ | / \ |
+ | / --------------- |
+ | / \ v
+ | / --------------
+ | / -----------------| CONNECTING | socket created, TCP
+ | | / -------------- connect initiated
+ | | | con_sock_state_connected()
+ | | v
+ -------------
+ | CONNECTED | TCP connection established
+ -------------
+
+Make the socket state an atomic variable, reinforcing that it's a
+distinct transtion with no possible "intermediate/both" states.
+This is almost certainly overkill at this point, though the
+transitions into CONNECTED and CLOSING state do get called via
+socket callback (the rest of the transitions occur with the
+connection mutex held). We can back out the atomicity later.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil<sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/messenger.h | 8 +++--
+ net/ceph/messenger.c | 64 +++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 70 insertions(+), 2 deletions(-)
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -137,14 +137,18 @@ struct ceph_connection {
+ const struct ceph_connection_operations *ops;
+
+ struct ceph_messenger *msgr;
++
++ atomic_t sock_state;
+ struct socket *sock;
++ struct ceph_entity_addr peer_addr; /* peer address */
++ struct ceph_entity_addr peer_addr_for_me;
++
+ unsigned long flags;
+ unsigned long state;
+ const char *error_msg; /* error message, if any */
+
+- struct ceph_entity_addr peer_addr; /* peer address */
+ struct ceph_entity_name peer_name; /* peer name */
+- struct ceph_entity_addr peer_addr_for_me;
++
+ unsigned peer_features;
+ u32 connect_seq; /* identify the most recent connection
+ attempt for this connection, client */
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -29,6 +29,14 @@
+ * the sender.
+ */
+
++/* State values for ceph_connection->sock_state; NEW is assumed to be 0 */
++
++#define CON_SOCK_STATE_NEW 0 /* -> CLOSED */
++#define CON_SOCK_STATE_CLOSED 1 /* -> CONNECTING */
++#define CON_SOCK_STATE_CONNECTING 2 /* -> CONNECTED or -> CLOSING */
++#define CON_SOCK_STATE_CONNECTED 3 /* -> CLOSING or -> CLOSED */
++#define CON_SOCK_STATE_CLOSING 4 /* -> CLOSED */
++
+ /* static tag bytes (protocol control messages) */
+ static char tag_msg = CEPH_MSGR_TAG_MSG;
+ static char tag_ack = CEPH_MSGR_TAG_ACK;
+@@ -147,6 +155,55 @@ void ceph_msgr_flush(void)
+ }
+ EXPORT_SYMBOL(ceph_msgr_flush);
+
++/* Connection socket state transition functions */
++
++static void con_sock_state_init(struct ceph_connection *con)
++{
++ int old_state;
++
++ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
++ if (WARN_ON(old_state != CON_SOCK_STATE_NEW))
++ printk("%s: unexpected old state %d\n", __func__, old_state);
++}
++
++static void con_sock_state_connecting(struct ceph_connection *con)
++{
++ int old_state;
++
++ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTING);
++ if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED))
++ printk("%s: unexpected old state %d\n", __func__, old_state);
++}
++
++static void con_sock_state_connected(struct ceph_connection *con)
++{
++ int old_state;
++
++ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED);
++ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING))
++ printk("%s: unexpected old state %d\n", __func__, old_state);
++}
++
++static void con_sock_state_closing(struct ceph_connection *con)
++{
++ int old_state;
++
++ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING);
++ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING &&
++ old_state != CON_SOCK_STATE_CONNECTED &&
++ old_state != CON_SOCK_STATE_CLOSING))
++ printk("%s: unexpected old state %d\n", __func__, old_state);
++}
++
++static void con_sock_state_closed(struct ceph_connection *con)
++{
++ int old_state;
++
++ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
++ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED &&
++ old_state != CON_SOCK_STATE_CLOSING))
++ printk("%s: unexpected old state %d\n", __func__, old_state);
++}
+
+ /*
+ * socket callback functions
+@@ -203,6 +260,7 @@ static void ceph_sock_state_change(struc
+ dout("%s TCP_CLOSE\n", __func__);
+ case TCP_CLOSE_WAIT:
+ dout("%s TCP_CLOSE_WAIT\n", __func__);
++ con_sock_state_closing(con);
+ if (test_and_set_bit(SOCK_CLOSED, &con->flags) == 0) {
+ if (test_bit(CONNECTING, &con->state))
+ con->error_msg = "connection failed";
+@@ -213,6 +271,7 @@ static void ceph_sock_state_change(struc
+ break;
+ case TCP_ESTABLISHED:
+ dout("%s TCP_ESTABLISHED\n", __func__);
++ con_sock_state_connected(con);
+ queue_con(con);
+ break;
+ default: /* Everything else is uninteresting */
+@@ -277,6 +336,7 @@ static int ceph_tcp_connect(struct ceph_
+ return ret;
+ }
+ con->sock = sock;
++ con_sock_state_connecting(con);
+
+ return 0;
+ }
+@@ -343,6 +403,7 @@ static int con_close_socket(struct ceph_
+ sock_release(con->sock);
+ con->sock = NULL;
+ clear_bit(SOCK_CLOSED, &con->state);
++ con_sock_state_closed(con);
+ return rc;
+ }
+
+@@ -462,6 +523,9 @@ void ceph_con_init(struct ceph_messenger
+ memset(con, 0, sizeof(*con));
+ atomic_set(&con->nref, 1);
+ con->msgr = msgr;
++
++ con_sock_state_init(con);
++
+ mutex_init(&con->mutex);
+ INIT_LIST_HEAD(&con->out_queue);
+ INIT_LIST_HEAD(&con->out_sent);
--- /dev/null
+From 3cfa1d37bc05179577c43f4ed6b2689556729813 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Sat, 26 May 2012 23:26:43 -0500
+Subject: libceph: provide osd number when creating osd
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit e10006f807ffc4d5b1d861305d18d9e8145891ca)
+
+Pass the osd number to the create_osd() routine, and move the
+initialization of fields that depend on it therein.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/osd_client.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -624,7 +624,7 @@ static void osd_reset(struct ceph_connec
+ /*
+ * Track open sessions with osds.
+ */
+-static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
++static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
+ {
+ struct ceph_osd *osd;
+
+@@ -634,6 +634,7 @@ static struct ceph_osd *create_osd(struc
+
+ atomic_set(&osd->o_ref, 1);
+ osd->o_osdc = osdc;
++ osd->o_osd = onum;
+ INIT_LIST_HEAD(&osd->o_requests);
+ INIT_LIST_HEAD(&osd->o_linger_requests);
+ INIT_LIST_HEAD(&osd->o_osd_lru);
+@@ -643,6 +644,7 @@ static struct ceph_osd *create_osd(struc
+ osd->o_con.private = osd;
+ osd->o_con.ops = &osd_con_ops;
+ osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD;
++ osd->o_con.peer_name.num = cpu_to_le64(onum);
+
+ INIT_LIST_HEAD(&osd->o_keepalive_item);
+ return osd;
+@@ -998,15 +1000,13 @@ static int __map_request(struct ceph_osd
+ req->r_osd = __lookup_osd(osdc, o);
+ if (!req->r_osd && o >= 0) {
+ err = -ENOMEM;
+- req->r_osd = create_osd(osdc);
++ req->r_osd = create_osd(osdc, o);
+ if (!req->r_osd) {
+ list_move(&req->r_req_lru_item, &osdc->req_notarget);
+ goto out;
+ }
+
+ dout("map_request osd %p is osd%d\n", req->r_osd, o);
+- req->r_osd->o_osd = o;
+- req->r_osd->o_con.peer_name.num = cpu_to_le64(o);
+ __insert_osd(osdc, req->r_osd);
+
+ ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]);
--- /dev/null
+From 2a162524ff9c9635cf040179c1f587b08fe5efa7 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Tue, 29 May 2012 11:04:58 -0500
+Subject: libceph: set CLOSED state bit in con_init
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit a5988c490ef66cb04ea2f610681949b25c773b3c)
+
+Once a connection is fully initialized, it is really in a CLOSED
+state, so make that explicit by setting the bit in its state field.
+
+It is possible for a connection in NEGOTIATING state to get a
+failure, leading to ceph_fault() and ultimately ceph_con_close().
+Clear that bits if it is set in that case, to reflect that the
+connection truly is closed and is no longer participating in a
+connect sequence.
+
+Issue a warning if ceph_con_open() is called on a connection that
+is not in CLOSED state.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 15 +++++++++++----
+ 1 file changed, 11 insertions(+), 4 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -454,11 +454,14 @@ void ceph_con_close(struct ceph_connecti
+ {
+ dout("con_close %p peer %s\n", con,
+ ceph_pr_addr(&con->peer_addr.in_addr));
+- set_bit(CLOSED, &con->state); /* in case there's queued work */
++ clear_bit(NEGOTIATING, &con->state);
+ clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */
++ set_bit(CLOSED, &con->state);
++
+ clear_bit(LOSSYTX, &con->flags); /* so we retry next connect */
+ clear_bit(KEEPALIVE_PENDING, &con->flags);
+ clear_bit(WRITE_PENDING, &con->flags);
++
+ mutex_lock(&con->mutex);
+ reset_connection(con);
+ con->peer_global_seq = 0;
+@@ -475,7 +478,8 @@ void ceph_con_open(struct ceph_connectio
+ {
+ dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
+ set_bit(OPENING, &con->state);
+- clear_bit(CLOSED, &con->state);
++ WARN_ON(!test_and_clear_bit(CLOSED, &con->state));
++
+ memcpy(&con->peer_addr, addr, sizeof(*addr));
+ con->delay = 0; /* reset backoff memory */
+ queue_con(con);
+@@ -530,6 +534,8 @@ void ceph_con_init(struct ceph_messenger
+ INIT_LIST_HEAD(&con->out_queue);
+ INIT_LIST_HEAD(&con->out_sent);
+ INIT_DELAYED_WORK(&con->work, con_work);
++
++ set_bit(CLOSED, &con->state);
+ }
+ EXPORT_SYMBOL(ceph_con_init);
+
+@@ -1937,14 +1943,15 @@ more:
+
+ /* open the socket first? */
+ if (con->sock == NULL) {
++ clear_bit(NEGOTIATING, &con->state);
++ set_bit(CONNECTING, &con->state);
++
+ con_out_kvec_reset(con);
+ prepare_write_banner(con);
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ goto out;
+ prepare_read_banner(con);
+- set_bit(CONNECTING, &con->state);
+- clear_bit(NEGOTIATING, &con->state);
+
+ BUG_ON(con->in_msg);
+ con->in_tag = CEPH_MSGR_TAG_READY;
--- /dev/null
+From ac495165ac2a91db87b344c4e769e3eccac72f89 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Sat, 26 May 2012 23:26:43 -0500
+Subject: libceph: embed ceph connection structure in mon_client
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 67130934fb579fdf0f2f6d745960264378b57dc8)
+
+A monitor client has a pointer to a ceph connection structure in it.
+This is the only one of the three ceph client types that do it this
+way; the OSD and MDS clients embed the connection into their main
+structures. There is always exactly one ceph connection for a
+monitor client, so there is no need to allocate it separate from the
+monitor client structure.
+
+So switch the ceph_mon_client structure to embed its
+ceph_connection structure.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/mon_client.h | 2 -
+ net/ceph/mon_client.c | 47 +++++++++++++++++-----------------------
+ 2 files changed, 21 insertions(+), 28 deletions(-)
+
+--- a/include/linux/ceph/mon_client.h
++++ b/include/linux/ceph/mon_client.h
+@@ -70,7 +70,7 @@ struct ceph_mon_client {
+ bool hunting;
+ int cur_mon; /* last monitor i contacted */
+ unsigned long sub_sent, sub_renew_after;
+- struct ceph_connection *con;
++ struct ceph_connection con;
+ bool have_fsid;
+
+ /* pending generic requests */
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -106,9 +106,9 @@ static void __send_prepared_auth_request
+ monc->pending_auth = 1;
+ monc->m_auth->front.iov_len = len;
+ monc->m_auth->hdr.front_len = cpu_to_le32(len);
+- ceph_con_revoke(monc->con, monc->m_auth);
++ ceph_con_revoke(&monc->con, monc->m_auth);
+ ceph_msg_get(monc->m_auth); /* keep our ref */
+- ceph_con_send(monc->con, monc->m_auth);
++ ceph_con_send(&monc->con, monc->m_auth);
+ }
+
+ /*
+@@ -117,8 +117,8 @@ static void __send_prepared_auth_request
+ static void __close_session(struct ceph_mon_client *monc)
+ {
+ dout("__close_session closing mon%d\n", monc->cur_mon);
+- ceph_con_revoke(monc->con, monc->m_auth);
+- ceph_con_close(monc->con);
++ ceph_con_revoke(&monc->con, monc->m_auth);
++ ceph_con_close(&monc->con);
+ monc->cur_mon = -1;
+ monc->pending_auth = 0;
+ ceph_auth_reset(monc->auth);
+@@ -142,9 +142,9 @@ static int __open_session(struct ceph_mo
+ monc->want_next_osdmap = !!monc->want_next_osdmap;
+
+ dout("open_session mon%d opening\n", monc->cur_mon);
+- monc->con->peer_name.type = CEPH_ENTITY_TYPE_MON;
+- monc->con->peer_name.num = cpu_to_le64(monc->cur_mon);
+- ceph_con_open(monc->con,
++ monc->con.peer_name.type = CEPH_ENTITY_TYPE_MON;
++ monc->con.peer_name.num = cpu_to_le64(monc->cur_mon);
++ ceph_con_open(&monc->con,
+ &monc->monmap->mon_inst[monc->cur_mon].addr);
+
+ /* initiatiate authentication handshake */
+@@ -226,8 +226,8 @@ static void __send_subscribe(struct ceph
+
+ msg->front.iov_len = p - msg->front.iov_base;
+ msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+- ceph_con_revoke(monc->con, msg);
+- ceph_con_send(monc->con, ceph_msg_get(msg));
++ ceph_con_revoke(&monc->con, msg);
++ ceph_con_send(&monc->con, ceph_msg_get(msg));
+
+ monc->sub_sent = jiffies | 1; /* never 0 */
+ }
+@@ -247,7 +247,7 @@ static void handle_subscribe_ack(struct
+ if (monc->hunting) {
+ pr_info("mon%d %s session established\n",
+ monc->cur_mon,
+- ceph_pr_addr(&monc->con->peer_addr.in_addr));
++ ceph_pr_addr(&monc->con.peer_addr.in_addr));
+ monc->hunting = false;
+ }
+ dout("handle_subscribe_ack after %d seconds\n", seconds);
+@@ -461,7 +461,7 @@ static int do_generic_request(struct cep
+ req->request->hdr.tid = cpu_to_le64(req->tid);
+ __insert_generic_request(monc, req);
+ monc->num_generic_requests++;
+- ceph_con_send(monc->con, ceph_msg_get(req->request));
++ ceph_con_send(&monc->con, ceph_msg_get(req->request));
+ mutex_unlock(&monc->mutex);
+
+ err = wait_for_completion_interruptible(&req->completion);
+@@ -684,8 +684,8 @@ static void __resend_generic_request(str
+
+ for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) {
+ req = rb_entry(p, struct ceph_mon_generic_request, node);
+- ceph_con_revoke(monc->con, req->request);
+- ceph_con_send(monc->con, ceph_msg_get(req->request));
++ ceph_con_revoke(&monc->con, req->request);
++ ceph_con_send(&monc->con, ceph_msg_get(req->request));
+ }
+ }
+
+@@ -705,7 +705,7 @@ static void delayed_work(struct work_str
+ __close_session(monc);
+ __open_session(monc); /* continue hunting */
+ } else {
+- ceph_con_keepalive(monc->con);
++ ceph_con_keepalive(&monc->con);
+
+ __validate_auth(monc);
+
+@@ -760,19 +760,16 @@ int ceph_monc_init(struct ceph_mon_clien
+ goto out;
+
+ /* connection */
+- monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL);
+- if (!monc->con)
+- goto out_monmap;
+- ceph_con_init(&monc->client->msgr, monc->con);
+- monc->con->private = monc;
+- monc->con->ops = &mon_con_ops;
++ ceph_con_init(&monc->client->msgr, &monc->con);
++ monc->con.private = monc;
++ monc->con.ops = &mon_con_ops;
+
+ /* authentication */
+ monc->auth = ceph_auth_init(cl->options->name,
+ cl->options->key);
+ if (IS_ERR(monc->auth)) {
+ err = PTR_ERR(monc->auth);
+- goto out_con;
++ goto out_monmap;
+ }
+ monc->auth->want_keys =
+ CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON |
+@@ -824,8 +821,6 @@ out_subscribe_ack:
+ ceph_msg_put(monc->m_subscribe_ack);
+ out_auth:
+ ceph_auth_destroy(monc->auth);
+-out_con:
+- monc->con->ops->put(monc->con);
+ out_monmap:
+ kfree(monc->monmap);
+ out:
+@@ -841,9 +836,7 @@ void ceph_monc_stop(struct ceph_mon_clie
+ mutex_lock(&monc->mutex);
+ __close_session(monc);
+
+- monc->con->private = NULL;
+- monc->con->ops->put(monc->con);
+- monc->con = NULL;
++ monc->con.private = NULL;
+
+ mutex_unlock(&monc->mutex);
+
+@@ -1029,7 +1022,7 @@ static void mon_fault(struct ceph_connec
+ if (!monc->hunting)
+ pr_info("mon%d %s session lost, "
+ "hunting for new mon\n", monc->cur_mon,
+- ceph_pr_addr(&monc->con->peer_addr.in_addr));
++ ceph_pr_addr(&monc->con.peer_addr.in_addr));
+
+ __close_session(monc);
+ if (!monc->hunting) {
--- /dev/null
+From 646a893f1d8346dc1b2826c684de99e5df37d5ed Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Thu, 31 May 2012 20:27:50 -0700
+Subject: libceph: drop connection refcounting for mon_client
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit ec87ef4309d33bd9c87a53bb5152a86ae7a65f25)
+
+All references to the embedded ceph_connection come from the msgr
+workqueue, which is drained prior to mon_client destruction. That
+means we can ignore con refcounting entirely.
+
+Signed-off-by: Sage Weil <sage@newdream.net>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/mon_client.c | 18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -1037,9 +1037,23 @@ out:
+ mutex_unlock(&monc->mutex);
+ }
+
++/*
++ * We can ignore refcounting on the connection struct, as all references
++ * will come from the messenger workqueue, which is drained prior to
++ * mon_client destruction.
++ */
++static struct ceph_connection *con_get(struct ceph_connection *con)
++{
++ return con;
++}
++
++static void con_put(struct ceph_connection *con)
++{
++}
++
+ static const struct ceph_connection_operations mon_con_ops = {
+- .get = ceph_con_get,
+- .put = ceph_con_put,
++ .get = con_get,
++ .put = con_put,
+ .dispatch = dispatch,
+ .fault = mon_fault,
+ .alloc_msg = mon_alloc_msg,
--- /dev/null
+From efea1a38cd969f4b52ec5cc468d8143bb496efc2 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Sat, 26 May 2012 23:26:43 -0500
+Subject: libceph: init monitor connection when opening
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 20581c1faf7b15ae1f8b80c0ec757877b0b53151)
+
+Hold off initializing a monitor client's connection until just
+before it gets opened for use.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/mon_client.c | 13 ++++++-------
+ 1 file changed, 6 insertions(+), 7 deletions(-)
+
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -119,6 +119,7 @@ static void __close_session(struct ceph_
+ dout("__close_session closing mon%d\n", monc->cur_mon);
+ ceph_con_revoke(&monc->con, monc->m_auth);
+ ceph_con_close(&monc->con);
++ monc->con.private = NULL;
+ monc->cur_mon = -1;
+ monc->pending_auth = 0;
+ ceph_auth_reset(monc->auth);
+@@ -141,9 +142,13 @@ static int __open_session(struct ceph_mo
+ monc->sub_renew_after = jiffies; /* i.e., expired */
+ monc->want_next_osdmap = !!monc->want_next_osdmap;
+
+- dout("open_session mon%d opening\n", monc->cur_mon);
++ ceph_con_init(&monc->client->msgr, &monc->con);
++ monc->con.private = monc;
++ monc->con.ops = &mon_con_ops;
+ monc->con.peer_name.type = CEPH_ENTITY_TYPE_MON;
+ monc->con.peer_name.num = cpu_to_le64(monc->cur_mon);
++
++ dout("open_session mon%d opening\n", monc->cur_mon);
+ ceph_con_open(&monc->con,
+ &monc->monmap->mon_inst[monc->cur_mon].addr);
+
+@@ -760,10 +765,6 @@ int ceph_monc_init(struct ceph_mon_clien
+ goto out;
+
+ /* connection */
+- ceph_con_init(&monc->client->msgr, &monc->con);
+- monc->con.private = monc;
+- monc->con.ops = &mon_con_ops;
+-
+ /* authentication */
+ monc->auth = ceph_auth_init(cl->options->name,
+ cl->options->key);
+@@ -836,8 +837,6 @@ void ceph_monc_stop(struct ceph_mon_clie
+ mutex_lock(&monc->mutex);
+ __close_session(monc);
+
+- monc->con.private = NULL;
+-
+ mutex_unlock(&monc->mutex);
+
+ /*
--- /dev/null
+From 449c48298a24fb8a48f6e0574f098ddfe6c81325 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Sat, 26 May 2012 23:26:43 -0500
+Subject: libceph: fully initialize connection in con_init()
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 1bfd89f4e6e1adc6a782d94aa5d4c53be1e404d7)
+
+Move the initialization of a ceph connection's private pointer,
+operations vector pointer, and peer name information into
+ceph_con_init(). Rearrange the arguments so the connection pointer
+is first. Hide the byte-swapping of the peer entity number inside
+ceph_con_init()
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/mds_client.c | 7 ++-----
+ include/linux/ceph/messenger.h | 6 ++++--
+ net/ceph/messenger.c | 9 ++++++++-
+ net/ceph/mon_client.c | 8 +++-----
+ net/ceph/osd_client.c | 7 ++-----
+ 5 files changed, 19 insertions(+), 18 deletions(-)
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -394,11 +394,8 @@ static struct ceph_mds_session *register
+ s->s_seq = 0;
+ mutex_init(&s->s_mutex);
+
+- ceph_con_init(&mdsc->fsc->client->msgr, &s->s_con);
+- s->s_con.private = s;
+- s->s_con.ops = &mds_con_ops;
+- s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS;
+- s->s_con.peer_name.num = cpu_to_le64(mds);
++ ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr,
++ CEPH_ENTITY_TYPE_MDS, mds);
+
+ spin_lock_init(&s->s_gen_ttl_lock);
+ s->s_cap_gen = 0;
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -219,8 +219,10 @@ extern void ceph_messenger_init(struct c
+ u32 required_features,
+ bool nocrc);
+
+-extern void ceph_con_init(struct ceph_messenger *msgr,
+- struct ceph_connection *con);
++extern void ceph_con_init(struct ceph_connection *con, void *private,
++ const struct ceph_connection_operations *ops,
++ struct ceph_messenger *msgr, __u8 entity_type,
++ __u64 entity_num);
+ extern void ceph_con_open(struct ceph_connection *con,
+ struct ceph_entity_addr *addr);
+ extern bool ceph_con_opened(struct ceph_connection *con);
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -521,15 +521,22 @@ void ceph_con_put(struct ceph_connection
+ /*
+ * initialize a new connection.
+ */
+-void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con)
++void ceph_con_init(struct ceph_connection *con, void *private,
++ const struct ceph_connection_operations *ops,
++ struct ceph_messenger *msgr, __u8 entity_type, __u64 entity_num)
+ {
+ dout("con_init %p\n", con);
+ memset(con, 0, sizeof(*con));
++ con->private = private;
++ con->ops = ops;
+ atomic_set(&con->nref, 1);
+ con->msgr = msgr;
+
+ con_sock_state_init(con);
+
++ con->peer_name.type = (__u8) entity_type;
++ con->peer_name.num = cpu_to_le64(entity_num);
++
+ mutex_init(&con->mutex);
+ INIT_LIST_HEAD(&con->out_queue);
+ INIT_LIST_HEAD(&con->out_sent);
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -142,11 +142,9 @@ static int __open_session(struct ceph_mo
+ monc->sub_renew_after = jiffies; /* i.e., expired */
+ monc->want_next_osdmap = !!monc->want_next_osdmap;
+
+- ceph_con_init(&monc->client->msgr, &monc->con);
+- monc->con.private = monc;
+- monc->con.ops = &mon_con_ops;
+- monc->con.peer_name.type = CEPH_ENTITY_TYPE_MON;
+- monc->con.peer_name.num = cpu_to_le64(monc->cur_mon);
++ ceph_con_init(&monc->con, monc, &mon_con_ops,
++ &monc->client->msgr,
++ CEPH_ENTITY_TYPE_MON, monc->cur_mon);
+
+ dout("open_session mon%d opening\n", monc->cur_mon);
+ ceph_con_open(&monc->con,
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -640,11 +640,8 @@ static struct ceph_osd *create_osd(struc
+ INIT_LIST_HEAD(&osd->o_osd_lru);
+ osd->o_incarnation = 1;
+
+- ceph_con_init(&osdc->client->msgr, &osd->o_con);
+- osd->o_con.private = osd;
+- osd->o_con.ops = &osd_con_ops;
+- osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD;
+- osd->o_con.peer_name.num = cpu_to_le64(onum);
++ ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr,
++ CEPH_ENTITY_TYPE_OSD, onum);
+
+ INIT_LIST_HEAD(&osd->o_keepalive_item);
+ return osd;
--- /dev/null
+From 3b865e1b62c6088a110cad70ea725f65775c0c5e Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 4 Jun 2012 14:43:32 -0500
+Subject: libceph: tweak ceph_alloc_msg()
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 1c20f2d26795803fc4f5155fe4fca5717a5944b6)
+
+The function ceph_alloc_msg() is only used to allocate a message
+that will be assigned to a connection's in_msg pointer. Rename the
+function so this implied usage is more clear.
+
+In addition, make that assignment inside the function (again, since
+that's precisely what it's intended to be used for). This allows us
+to return what is now provided via the passed-in address of a "skip"
+variable. The return type is now Boolean to be explicit that there
+are only two possible outcomes.
+
+Make sure the result of an ->alloc_msg method call always sets the
+value of *skip properly.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 61 +++++++++++++++++++++++++++-----------------------
+ net/ceph/mon_client.c | 3 ++
+ net/ceph/osd_client.c | 1
+ 3 files changed, 38 insertions(+), 27 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1659,9 +1659,8 @@ static int read_partial_message_section(
+ return 1;
+ }
+
+-static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
+- struct ceph_msg_header *hdr,
+- int *skip);
++static bool ceph_con_in_msg_alloc(struct ceph_connection *con,
++ struct ceph_msg_header *hdr);
+
+
+ static int read_partial_message_pages(struct ceph_connection *con,
+@@ -1744,7 +1743,6 @@ static int read_partial_message(struct c
+ int ret;
+ unsigned front_len, middle_len, data_len;
+ bool do_datacrc = !con->msgr->nocrc;
+- int skip;
+ u64 seq;
+ u32 crc;
+
+@@ -1797,9 +1795,7 @@ static int read_partial_message(struct c
+ if (!con->in_msg) {
+ dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
+ con->in_hdr.front_len, con->in_hdr.data_len);
+- skip = 0;
+- con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
+- if (skip) {
++ if (ceph_con_in_msg_alloc(con, &con->in_hdr)) {
+ /* skip this message */
+ dout("alloc_msg said skip message\n");
+ BUG_ON(con->in_msg);
+@@ -2581,46 +2577,57 @@ static int ceph_alloc_middle(struct ceph
+ }
+
+ /*
+- * Generic message allocator, for incoming messages.
++ * Allocate a message for receiving an incoming message on a
++ * connection, and save the result in con->in_msg. Uses the
++ * connection's private alloc_msg op if available.
++ *
++ * Returns true if the message should be skipped, false otherwise.
++ * If true is returned (skip message), con->in_msg will be NULL.
++ * If false is returned, con->in_msg will contain a pointer to the
++ * newly-allocated message, or NULL in case of memory exhaustion.
+ */
+-static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
+- struct ceph_msg_header *hdr,
+- int *skip)
++static bool ceph_con_in_msg_alloc(struct ceph_connection *con,
++ struct ceph_msg_header *hdr)
+ {
+ int type = le16_to_cpu(hdr->type);
+ int front_len = le32_to_cpu(hdr->front_len);
+ int middle_len = le32_to_cpu(hdr->middle_len);
+- struct ceph_msg *msg = NULL;
+ int ret;
+
++ BUG_ON(con->in_msg != NULL);
++
+ if (con->ops->alloc_msg) {
++ int skip = 0;
++
+ mutex_unlock(&con->mutex);
+- msg = con->ops->alloc_msg(con, hdr, skip);
++ con->in_msg = con->ops->alloc_msg(con, hdr, &skip);
+ mutex_lock(&con->mutex);
+- if (!msg || *skip)
+- return NULL;
++ if (skip)
++ con->in_msg = NULL;
++
++ if (!con->in_msg)
++ return skip != 0;
+ }
+- if (!msg) {
+- *skip = 0;
+- msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
+- if (!msg) {
++ if (!con->in_msg) {
++ con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
++ if (!con->in_msg) {
+ pr_err("unable to allocate msg type %d len %d\n",
+ type, front_len);
+- return NULL;
++ return false;
+ }
+- msg->page_alignment = le16_to_cpu(hdr->data_off);
++ con->in_msg->page_alignment = le16_to_cpu(hdr->data_off);
+ }
+- memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
++ memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
+
+- if (middle_len && !msg->middle) {
+- ret = ceph_alloc_middle(con, msg);
++ if (middle_len && !con->in_msg->middle) {
++ ret = ceph_alloc_middle(con, con->in_msg);
+ if (ret < 0) {
+- ceph_msg_put(msg);
+- return NULL;
++ ceph_msg_put(con->in_msg);
++ con->in_msg = NULL;
+ }
+ }
+
+- return msg;
++ return false;
+ }
+
+
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -442,6 +442,7 @@ static struct ceph_msg *get_generic_repl
+ m = NULL;
+ } else {
+ dout("get_generic_reply %lld got %p\n", tid, req->reply);
++ *skip = 0;
+ m = ceph_msg_get(req->reply);
+ /*
+ * we don't need to track the connection reading into
+@@ -990,6 +991,8 @@ static struct ceph_msg *mon_alloc_msg(st
+ case CEPH_MSG_MDS_MAP:
+ case CEPH_MSG_OSD_MAP:
+ m = ceph_msg_new(type, front_len, GFP_NOFS, false);
++ if (!m)
++ return NULL; /* ENOMEM--return skip == 0 */
+ break;
+ }
+
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -2077,6 +2077,7 @@ static struct ceph_msg *alloc_msg(struct
+ int type = le16_to_cpu(hdr->type);
+ int front = le32_to_cpu(hdr->front_len);
+
++ *skip = 0;
+ switch (type) {
+ case CEPH_MSG_OSD_MAP:
+ case CEPH_MSG_WATCH_NOTIFY:
--- /dev/null
+From 48588cda937cf200d7cf89fbb74e59449a389de8 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Fri, 1 Jun 2012 14:56:43 -0500
+Subject: libceph: have messages point to their connection
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 38941f8031bf042dba3ced6394ba3a3b16c244ea)
+
+When a ceph message is queued for sending it is placed on a list of
+pending messages (ceph_connection->out_queue). When they are
+actually sent over the wire, they are moved from that list to
+another (ceph_connection->out_sent). When acknowledgement for the
+message is received, it is removed from the sent messages list.
+
+During that entire time the message is "in the possession" of a
+single ceph connection. Keep track of that connection in the
+message. This will be used in the next patch (and is a helpful
+bit of information for debugging anyway).
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/messenger.h | 3 +++
+ net/ceph/messenger.c | 27 +++++++++++++++++++++++++--
+ 2 files changed, 28 insertions(+), 2 deletions(-)
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -77,7 +77,10 @@ struct ceph_msg {
+ unsigned nr_pages; /* size of page array */
+ unsigned page_alignment; /* io offset in first page */
+ struct ceph_pagelist *pagelist; /* instead of pages */
++
++ struct ceph_connection *con;
+ struct list_head list_head;
++
+ struct kref kref;
+ struct bio *bio; /* instead of pages/pagelist */
+ struct bio *bio_iter; /* bio iterator */
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -414,6 +414,9 @@ static int con_close_socket(struct ceph_
+ static void ceph_msg_remove(struct ceph_msg *msg)
+ {
+ list_del_init(&msg->list_head);
++ BUG_ON(msg->con == NULL);
++ msg->con = NULL;
++
+ ceph_msg_put(msg);
+ }
+ static void ceph_msg_remove_list(struct list_head *head)
+@@ -433,6 +436,8 @@ static void reset_connection(struct ceph
+ ceph_msg_remove_list(&con->out_sent);
+
+ if (con->in_msg) {
++ BUG_ON(con->in_msg->con != con);
++ con->in_msg->con = NULL;
+ ceph_msg_put(con->in_msg);
+ con->in_msg = NULL;
+ }
+@@ -625,8 +630,10 @@ static void prepare_write_message(struct
+ &con->out_temp_ack);
+ }
+
++ BUG_ON(list_empty(&con->out_queue));
+ m = list_first_entry(&con->out_queue, struct ceph_msg, list_head);
+ con->out_msg = m;
++ BUG_ON(m->con != con);
+
+ /* put message on sent list */
+ ceph_msg_get(m);
+@@ -1810,6 +1817,8 @@ static int read_partial_message(struct c
+ "error allocating memory for incoming message";
+ return -ENOMEM;
+ }
++
++ BUG_ON(con->in_msg->con != con);
+ m = con->in_msg;
+ m->front.iov_len = 0; /* haven't read it yet */
+ if (m->middle)
+@@ -1905,6 +1914,8 @@ static void process_message(struct ceph_
+ {
+ struct ceph_msg *msg;
+
++ BUG_ON(con->in_msg->con != con);
++ con->in_msg->con = NULL;
+ msg = con->in_msg;
+ con->in_msg = NULL;
+
+@@ -2264,6 +2275,8 @@ static void ceph_fault(struct ceph_conne
+ con_close_socket(con);
+
+ if (con->in_msg) {
++ BUG_ON(con->in_msg->con != con);
++ con->in_msg->con = NULL;
+ ceph_msg_put(con->in_msg);
+ con->in_msg = NULL;
+ }
+@@ -2382,6 +2395,8 @@ void ceph_con_send(struct ceph_connectio
+
+ /* queue */
+ mutex_lock(&con->mutex);
++ BUG_ON(msg->con != NULL);
++ msg->con = con;
+ BUG_ON(!list_empty(&msg->list_head));
+ list_add_tail(&msg->list_head, &con->out_queue);
+ dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg,
+@@ -2407,13 +2422,16 @@ void ceph_con_revoke(struct ceph_connect
+ {
+ mutex_lock(&con->mutex);
+ if (!list_empty(&msg->list_head)) {
+- dout("con_revoke %p msg %p - was on queue\n", con, msg);
++ dout("%s %p msg %p - was on queue\n", __func__, con, msg);
+ list_del_init(&msg->list_head);
++ BUG_ON(msg->con == NULL);
++ msg->con = NULL;
++
+ ceph_msg_put(msg);
+ msg->hdr.seq = 0;
+ }
+ if (con->out_msg == msg) {
+- dout("con_revoke %p msg %p - was sending\n", con, msg);
++ dout("%s %p msg %p - was sending\n", __func__, con, msg);
+ con->out_msg = NULL;
+ if (con->out_kvec_is_msg) {
+ con->out_skip = con->out_kvec_bytes;
+@@ -2482,6 +2500,8 @@ struct ceph_msg *ceph_msg_new(int type,
+ if (m == NULL)
+ goto out;
+ kref_init(&m->kref);
++
++ m->con = NULL;
+ INIT_LIST_HEAD(&m->list_head);
+
+ m->hdr.tid = 0;
+@@ -2602,6 +2622,8 @@ static bool ceph_con_in_msg_alloc(struct
+ mutex_unlock(&con->mutex);
+ con->in_msg = con->ops->alloc_msg(con, hdr, &skip);
+ mutex_lock(&con->mutex);
++ if (con->in_msg)
++ con->in_msg->con = con;
+ if (skip)
+ con->in_msg = NULL;
+
+@@ -2615,6 +2637,7 @@ static bool ceph_con_in_msg_alloc(struct
+ type, front_len);
+ return false;
+ }
++ con->in_msg->con = con;
+ con->in_msg->page_alignment = le16_to_cpu(hdr->data_off);
+ }
+ memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
--- /dev/null
+From 898bf051066aaecc79487425b6614fb8e0efca4a Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 4 Jun 2012 14:43:33 -0500
+Subject: libceph: have messages take a connection reference
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 92ce034b5a740046cc643a21ea21eaad589e0043)
+
+There are essentially two types of ceph messages: incoming and
+outgoing. Outgoing messages are always allocated via ceph_msg_new(),
+and at the time of their allocation they are not associated with any
+particular connection. Incoming messages are always allocated via
+ceph_con_in_msg_alloc(), and they are initially associated with the
+connection from which incoming data will be placed into the message.
+
+When an outgoing message gets sent, it becomes associated with a
+connection and remains that way until the message is successfully
+sent. The association of an incoming message goes away at the point
+it is sent to an upper layer via a con->ops->dispatch method.
+
+This patch implements reference counting for all ceph messages, such
+that every message holds a reference (and a pointer) to a connection
+if and only if it is associated with that connection (as described
+above).
+
+For background, here is an explanation of the ceph message
+lifecycle, emphasizing when an association exists between a message
+and a connection.
+
+Outgoing Messages
+An outgoing message is "owned" by its allocator, from the time it is
+allocated in ceph_msg_new() up to the point it gets queued for
+sending in ceph_con_send(). Prior to that point the message's
+msg->con pointer is null; at the point it is queued for sending its
+message pointer is assigned to refer to the connection. At that
+time the message is inserted into a connection's out_queue list.
+
+When a message on the out_queue list has been sent to the socket
+layer to be put on the wire, it is transferred out of that list and
+into the connection's out_sent list. At that point it is still owned
+by the connection, and will remain so until an acknowledgement is
+received from the recipient that indicates the message was
+successfully transferred. When such an acknowledgement is received
+(in process_ack()), the message is removed from its list (in
+ceph_msg_remove()), at which point it is no longer associated with
+the connection.
+
+So basically, any time a message is on one of a connection's lists,
+it is associated with that connection. Reference counting outgoing
+messages can thus be done at the points a message is added to the
+out_queue (in ceph_con_send()) and the point it is removed from
+either its two lists (in ceph_msg_remove())--at which point its
+connection pointer becomes null.
+
+Incoming Messages
+When an incoming message on a connection is getting read (in
+read_partial_message()) and there is no message in con->in_msg,
+a new one is allocated using ceph_con_in_msg_alloc(). At that
+point the message is associated with the connection. Once that
+message has been completely and successfully read, it is passed to
+upper layer code using the connection's con->ops->dispatch method.
+At that point the association between the message and the connection
+no longer exists.
+
+Reference counting of connections for incoming messages can be done
+by taking a reference to the connection when the message gets
+allocated, and releasing that reference when it gets handed off
+using the dispatch method.
+
+We should never fail to get a connection reference for a
+message--the since the caller should already hold one.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 24 ++++++++++++++++++------
+ 1 file changed, 18 insertions(+), 6 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -415,6 +415,7 @@ static void ceph_msg_remove(struct ceph_
+ {
+ list_del_init(&msg->list_head);
+ BUG_ON(msg->con == NULL);
++ ceph_con_put(msg->con);
+ msg->con = NULL;
+
+ ceph_msg_put(msg);
+@@ -440,6 +441,7 @@ static void reset_connection(struct ceph
+ con->in_msg->con = NULL;
+ ceph_msg_put(con->in_msg);
+ con->in_msg = NULL;
++ ceph_con_put(con->in_msg->con);
+ }
+
+ con->connect_seq = 0;
+@@ -1918,6 +1920,7 @@ static void process_message(struct ceph_
+ con->in_msg->con = NULL;
+ msg = con->in_msg;
+ con->in_msg = NULL;
++ ceph_con_put(con);
+
+ /* if first message, set peer_name */
+ if (con->peer_name.type == 0)
+@@ -2279,6 +2282,7 @@ static void ceph_fault(struct ceph_conne
+ con->in_msg->con = NULL;
+ ceph_msg_put(con->in_msg);
+ con->in_msg = NULL;
++ ceph_con_put(con);
+ }
+
+ /* Requeue anything that hasn't been acked */
+@@ -2395,8 +2399,11 @@ void ceph_con_send(struct ceph_connectio
+
+ /* queue */
+ mutex_lock(&con->mutex);
++
+ BUG_ON(msg->con != NULL);
+- msg->con = con;
++ msg->con = ceph_con_get(con);
++ BUG_ON(msg->con == NULL);
++
+ BUG_ON(!list_empty(&msg->list_head));
+ list_add_tail(&msg->list_head, &con->out_queue);
+ dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg,
+@@ -2425,10 +2432,11 @@ void ceph_con_revoke(struct ceph_connect
+ dout("%s %p msg %p - was on queue\n", __func__, con, msg);
+ list_del_init(&msg->list_head);
+ BUG_ON(msg->con == NULL);
++ ceph_con_put(msg->con);
+ msg->con = NULL;
++ msg->hdr.seq = 0;
+
+ ceph_msg_put(msg);
+- msg->hdr.seq = 0;
+ }
+ if (con->out_msg == msg) {
+ dout("%s %p msg %p - was sending\n", __func__, con, msg);
+@@ -2437,8 +2445,9 @@ void ceph_con_revoke(struct ceph_connect
+ con->out_skip = con->out_kvec_bytes;
+ con->out_kvec_is_msg = false;
+ }
+- ceph_msg_put(msg);
+ msg->hdr.seq = 0;
++
++ ceph_msg_put(msg);
+ }
+ mutex_unlock(&con->mutex);
+ }
+@@ -2622,8 +2631,10 @@ static bool ceph_con_in_msg_alloc(struct
+ mutex_unlock(&con->mutex);
+ con->in_msg = con->ops->alloc_msg(con, hdr, &skip);
+ mutex_lock(&con->mutex);
+- if (con->in_msg)
+- con->in_msg->con = con;
++ if (con->in_msg) {
++ con->in_msg->con = ceph_con_get(con);
++ BUG_ON(con->in_msg->con == NULL);
++ }
+ if (skip)
+ con->in_msg = NULL;
+
+@@ -2637,7 +2648,8 @@ static bool ceph_con_in_msg_alloc(struct
+ type, front_len);
+ return false;
+ }
+- con->in_msg->con = con;
++ con->in_msg->con = ceph_con_get(con);
++ BUG_ON(con->in_msg->con == NULL);
+ con->in_msg->page_alignment = le16_to_cpu(hdr->data_off);
+ }
+ memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
--- /dev/null
+From 0ca876aefb93f1bda1b194af80ed90def4a21768 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Fri, 1 Jun 2012 14:56:43 -0500
+Subject: libceph: make ceph_con_revoke() a msg operation
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 6740a845b2543cc46e1902ba21bac743fbadd0dc)
+
+ceph_con_revoke() is passed both a message and a ceph connection.
+Now that any message associated with a connection holds a pointer
+to that connection, there's no need to provide the connection when
+revoking a message.
+
+This has the added benefit of precluding the possibility of the
+providing the wrong connection pointer. If the message's connection
+pointer is null, it is not being tracked by any connection, so
+revoking it is a no-op. This is supported as a convenience for
+upper layers, so they can revoke a message that is not actually
+"in flight."
+
+Rename the function ceph_msg_revoke() to reflect that it is really
+an operation on a message, not a connection.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/messenger.h | 3 ++-
+ net/ceph/messenger.c | 7 ++++++-
+ net/ceph/mon_client.c | 8 ++++----
+ net/ceph/osd_client.c | 4 ++--
+ 4 files changed, 14 insertions(+), 8 deletions(-)
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -231,7 +231,8 @@ extern void ceph_con_open(struct ceph_co
+ extern bool ceph_con_opened(struct ceph_connection *con);
+ extern void ceph_con_close(struct ceph_connection *con);
+ extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
+-extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
++
++extern void ceph_msg_revoke(struct ceph_msg *msg);
+ extern void ceph_con_revoke_message(struct ceph_connection *con,
+ struct ceph_msg *msg);
+ extern void ceph_con_keepalive(struct ceph_connection *con);
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2425,8 +2425,13 @@ EXPORT_SYMBOL(ceph_con_send);
+ /*
+ * Revoke a message that was previously queued for send
+ */
+-void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
++void ceph_msg_revoke(struct ceph_msg *msg)
+ {
++ struct ceph_connection *con = msg->con;
++
++ if (!con)
++ return; /* Message not in our possession */
++
+ mutex_lock(&con->mutex);
+ if (!list_empty(&msg->list_head)) {
+ dout("%s %p msg %p - was on queue\n", __func__, con, msg);
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -106,7 +106,7 @@ static void __send_prepared_auth_request
+ monc->pending_auth = 1;
+ monc->m_auth->front.iov_len = len;
+ monc->m_auth->hdr.front_len = cpu_to_le32(len);
+- ceph_con_revoke(&monc->con, monc->m_auth);
++ ceph_msg_revoke(monc->m_auth);
+ ceph_msg_get(monc->m_auth); /* keep our ref */
+ ceph_con_send(&monc->con, monc->m_auth);
+ }
+@@ -117,7 +117,7 @@ static void __send_prepared_auth_request
+ static void __close_session(struct ceph_mon_client *monc)
+ {
+ dout("__close_session closing mon%d\n", monc->cur_mon);
+- ceph_con_revoke(&monc->con, monc->m_auth);
++ ceph_msg_revoke(monc->m_auth);
+ ceph_con_close(&monc->con);
+ monc->con.private = NULL;
+ monc->cur_mon = -1;
+@@ -229,7 +229,7 @@ static void __send_subscribe(struct ceph
+
+ msg->front.iov_len = p - msg->front.iov_base;
+ msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+- ceph_con_revoke(&monc->con, msg);
++ ceph_msg_revoke(msg);
+ ceph_con_send(&monc->con, ceph_msg_get(msg));
+
+ monc->sub_sent = jiffies | 1; /* never 0 */
+@@ -688,7 +688,7 @@ static void __resend_generic_request(str
+
+ for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) {
+ req = rb_entry(p, struct ceph_mon_generic_request, node);
+- ceph_con_revoke(&monc->con, req->request);
++ ceph_msg_revoke(req->request);
+ ceph_con_send(&monc->con, ceph_msg_get(req->request));
+ }
+ }
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -852,7 +852,7 @@ static void __unregister_request(struct
+
+ if (req->r_osd) {
+ /* make sure the original request isn't in flight. */
+- ceph_con_revoke(&req->r_osd->o_con, req->r_request);
++ ceph_msg_revoke(req->r_request);
+
+ list_del_init(&req->r_osd_item);
+ if (list_empty(&req->r_osd->o_requests) &&
+@@ -879,7 +879,7 @@ static void __unregister_request(struct
+ static void __cancel_request(struct ceph_osd_request *req)
+ {
+ if (req->r_sent && req->r_osd) {
+- ceph_con_revoke(&req->r_osd->o_con, req->r_request);
++ ceph_msg_revoke(req->r_request);
+ req->r_sent = 0;
+ }
+ }
--- /dev/null
+From ede3074a05d4669f799c8887291c940e8f86849b Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Fri, 1 Jun 2012 14:56:43 -0500
+Subject: libceph: make ceph_con_revoke_message() a msg op
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 8921d114f5574c6da2cdd00749d185633ecf88f3)
+
+ceph_con_revoke_message() is passed both a message and a ceph
+connection. A ceph_msg allocated for incoming messages on a
+connection always has a pointer to that connection, so there's no
+need to provide the connection when revoking such a message.
+
+Note that the existing logic does not preclude the message supplied
+being a null/bogus message pointer. The only user of this interface
+is the OSD client, and the only value an osd client passes is a
+request's r_reply field. That is always non-null (except briefly in
+an error path in ceph_osdc_alloc_request(), and that drops the
+only reference so the request won't ever have a reply to revoke).
+So we can safely assume the passed-in message is non-null, but add a
+BUG_ON() to make it very obvious we are imposing this restriction.
+
+Rename the function ceph_msg_revoke_incoming() to reflect that it is
+really an operation on an incoming message.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/messenger.h | 4 ++--
+ net/ceph/messenger.c | 22 ++++++++++++++++------
+ net/ceph/osd_client.c | 9 ++++-----
+ 3 files changed, 22 insertions(+), 13 deletions(-)
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -233,8 +233,8 @@ extern void ceph_con_close(struct ceph_c
+ extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
+
+ extern void ceph_msg_revoke(struct ceph_msg *msg);
+-extern void ceph_con_revoke_message(struct ceph_connection *con,
+- struct ceph_msg *msg);
++extern void ceph_msg_revoke_incoming(struct ceph_msg *msg);
++
+ extern void ceph_con_keepalive(struct ceph_connection *con);
+ extern struct ceph_connection *ceph_con_get(struct ceph_connection *con);
+ extern void ceph_con_put(struct ceph_connection *con);
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2460,17 +2460,27 @@ void ceph_msg_revoke(struct ceph_msg *ms
+ /*
+ * Revoke a message that we may be reading data into
+ */
+-void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
++void ceph_msg_revoke_incoming(struct ceph_msg *msg)
+ {
++ struct ceph_connection *con;
++
++ BUG_ON(msg == NULL);
++ if (!msg->con) {
++ dout("%s msg %p null con\n", __func__, msg);
++
++ return; /* Message not in our possession */
++ }
++
++ con = msg->con;
+ mutex_lock(&con->mutex);
+- if (con->in_msg && con->in_msg == msg) {
++ if (con->in_msg == msg) {
+ unsigned front_len = le32_to_cpu(con->in_hdr.front_len);
+ unsigned middle_len = le32_to_cpu(con->in_hdr.middle_len);
+ unsigned data_len = le32_to_cpu(con->in_hdr.data_len);
+
+ /* skip rest of message */
+- dout("con_revoke_pages %p msg %p revoked\n", con, msg);
+- con->in_base_pos = con->in_base_pos -
++ dout("%s %p msg %p revoked\n", __func__, con, msg);
++ con->in_base_pos = con->in_base_pos -
+ sizeof(struct ceph_msg_header) -
+ front_len -
+ middle_len -
+@@ -2481,8 +2491,8 @@ void ceph_con_revoke_message(struct ceph
+ con->in_tag = CEPH_MSGR_TAG_READY;
+ con->in_seq++;
+ } else {
+- dout("con_revoke_pages %p msg %p pages %p no-op\n",
+- con, con->in_msg, msg);
++ dout("%s %p in_msg %p msg %p no-op\n",
++ __func__, con, con->in_msg, msg);
+ }
+ mutex_unlock(&con->mutex);
+ }
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -140,10 +140,9 @@ void ceph_osdc_release_request(struct kr
+ if (req->r_request)
+ ceph_msg_put(req->r_request);
+ if (req->r_con_filling_msg) {
+- dout("release_request revoking pages %p from con %p\n",
++ dout("%s revoking pages %p from con %p\n", __func__,
+ req->r_pages, req->r_con_filling_msg);
+- ceph_con_revoke_message(req->r_con_filling_msg,
+- req->r_reply);
++ ceph_msg_revoke_incoming(req->r_reply);
+ req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
+ }
+ if (req->r_reply)
+@@ -2022,9 +2021,9 @@ static struct ceph_msg *get_reply(struct
+ }
+
+ if (req->r_con_filling_msg) {
+- dout("get_reply revoking msg %p from old con %p\n",
++ dout("%s revoking msg %p from old con %p\n", __func__,
+ req->r_reply, req->r_con_filling_msg);
+- ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply);
++ ceph_msg_revoke_incoming(req->r_reply);
+ req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
+ req->r_con_filling_msg = NULL;
+ }
--- /dev/null
+From d87d591772b2956b9ac9e25eb499366100d2c4a8 Mon Sep 17 00:00:00 2001
+From: Xi Wang <xi.wang@gmail.com>
+Date: Wed, 6 Jun 2012 19:35:55 -0500
+Subject: libceph: fix overflow in __decode_pool_names()
+
+From: Xi Wang <xi.wang@gmail.com>
+
+(cherry picked from commit ad3b904c07dfa88603689bf9a67bffbb9b99beb5)
+
+`len' is read from network and thus needs validation. Otherwise a
+large `len' would cause out-of-bounds access via the memcpy() call.
+In addition, len = 0xffffffff would overflow the kmalloc() size,
+leading to out-of-bounds write.
+
+This patch adds a check of `len' via ceph_decode_need(). Also use
+kstrndup rather than kmalloc/memcpy.
+
+[elder@inktank.com: added -ENOMEM return for null kstrndup() result]
+
+Signed-off-by: Xi Wang <xi.wang@gmail.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/osdmap.c | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/net/ceph/osdmap.c
++++ b/net/ceph/osdmap.c
+@@ -495,15 +495,16 @@ static int __decode_pool_names(void **p,
+ ceph_decode_32_safe(p, end, pool, bad);
+ ceph_decode_32_safe(p, end, len, bad);
+ dout(" pool %d len %d\n", pool, len);
++ ceph_decode_need(p, end, len, bad);
+ pi = __lookup_pg_pool(&map->pg_pools, pool);
+ if (pi) {
++ char *name = kstrndup(*p, len, GFP_NOFS);
++
++ if (!name)
++ return -ENOMEM;
+ kfree(pi->name);
+- pi->name = kmalloc(len + 1, GFP_NOFS);
+- if (pi->name) {
+- memcpy(pi->name, *p, len);
+- pi->name[len] = '\0';
+- dout(" name is %s\n", pi->name);
+- }
++ pi->name = name;
++ dout(" name is %s\n", pi->name);
+ }
+ *p += len;
+ }
--- /dev/null
+From cc725c099f905095dfa2fe50c46575096ff0052d Mon Sep 17 00:00:00 2001
+From: Xi Wang <xi.wang@gmail.com>
+Date: Wed, 6 Jun 2012 19:35:55 -0500
+Subject: libceph: fix overflow in osdmap_decode()
+
+From: Xi Wang <xi.wang@gmail.com>
+
+(cherry picked from commit e91a9b639a691e0982088b5954eaafb5a25c8f1c)
+
+On 32-bit systems, a large `n' would overflow `n * sizeof(u32)' and bypass
+the check ceph_decode_need(p, end, n * sizeof(u32), bad). It would also
+overflow the subsequent kmalloc() size, leading to out-of-bounds write.
+
+Signed-off-by: Xi Wang <xi.wang@gmail.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/osdmap.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/ceph/osdmap.c
++++ b/net/ceph/osdmap.c
+@@ -674,6 +674,9 @@ struct ceph_osdmap *osdmap_decode(void *
+ ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
+ ceph_decode_copy(p, &pgid, sizeof(pgid));
+ n = ceph_decode_32(p);
++ err = -EINVAL;
++ if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
++ goto bad;
+ ceph_decode_need(p, end, n * sizeof(u32), bad);
+ err = -ENOMEM;
+ pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS);
--- /dev/null
+From 1edb6147d0adbb0d51bed7c7432f59bbc41f8c82 Mon Sep 17 00:00:00 2001
+From: Xi Wang <xi.wang@gmail.com>
+Date: Wed, 6 Jun 2012 19:35:55 -0500
+Subject: libceph: fix overflow in osdmap_apply_incremental()
+
+From: Xi Wang <xi.wang@gmail.com>
+
+(cherry picked from commit a5506049500b30dbc5edb4d07a3577477c1f3643)
+
+On 32-bit systems, a large `pglen' would overflow `pglen*sizeof(u32)'
+and bypass the check ceph_decode_need(p, end, pglen*sizeof(u32), bad).
+It would also overflow the subsequent kmalloc() size, leading to
+out-of-bounds write.
+
+Signed-off-by: Xi Wang <xi.wang@gmail.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/osdmap.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/ceph/osdmap.c
++++ b/net/ceph/osdmap.c
+@@ -900,6 +900,10 @@ struct ceph_osdmap *osdmap_apply_increme
+ (void) __remove_pg_mapping(&map->pg_temp, pgid);
+
+ /* insert */
++ if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) {
++ err = -EINVAL;
++ goto bad;
++ }
+ pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS);
+ if (!pg) {
+ err = -ENOMEM;
--- /dev/null
+From aa868bb25ad02c63c69783a9ffa1f6f1d3e98a5d Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Sat, 9 Jun 2012 14:19:21 -0700
+Subject: libceph: transition socket state prior to actual connect
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 89a86be0ce20022f6ede8bccec078dbb3d63caaa)
+
+Once we call ->connect(), we are racing against the actual
+connection, and a subsequent transition from CONNECTING ->
+CONNECTED. Set the state to CONNECTING before that, under the
+protection of the mutex, to avoid the race.
+
+This was introduced in 928443cd9644e7cfd46f687dbeffda2d1a357ff9,
+with the original socket state code.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -321,6 +321,7 @@ static int ceph_tcp_connect(struct ceph_
+
+ dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
+
++ con_sock_state_connecting(con);
+ ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
+ O_NONBLOCK);
+ if (ret == -EINPROGRESS) {
+@@ -336,8 +337,6 @@ static int ceph_tcp_connect(struct ceph_
+ return ret;
+ }
+ con->sock = sock;
+- con_sock_state_connecting(con);
+-
+ return 0;
+ }
+
0037-ceph-check-PG_Private-flag-before-accessing-page-pri.patch
0038-libceph-eliminate-connection-state-DEAD.patch
0039-libceph-kill-bad_proto-ceph-connection-op.patch
+0040-libceph-rename-socket-callbacks.patch
+0041-libceph-rename-kvec_reset-and-kvec_add-functions.patch
+0042-libceph-embed-ceph-messenger-structure-in-ceph_clien.patch
+0043-libceph-start-separating-connection-flags-from-state.patch
+0044-libceph-start-tracking-connection-socket-state.patch
+0045-libceph-provide-osd-number-when-creating-osd.patch
+0046-libceph-set-CLOSED-state-bit-in-con_init.patch
+0047-libceph-embed-ceph-connection-structure-in-mon_clien.patch
+0048-libceph-drop-connection-refcounting-for-mon_client.patch
+0049-libceph-init-monitor-connection-when-opening.patch
+0050-libceph-fully-initialize-connection-in-con_init.patch
+0051-libceph-tweak-ceph_alloc_msg.patch
+0052-libceph-have-messages-point-to-their-connection.patch
+0053-libceph-have-messages-take-a-connection-reference.patch
+0054-libceph-make-ceph_con_revoke-a-msg-operation.patch
+0055-libceph-make-ceph_con_revoke_message-a-msg-op.patch
+0056-libceph-fix-overflow-in-__decode_pool_names.patch
+0057-libceph-fix-overflow-in-osdmap_decode.patch
+0058-libceph-fix-overflow-in-osdmap_apply_incremental.patch
+0059-libceph-transition-socket-state-prior-to-actual-conn.patch