]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 21 Nov 2012 20:04:45 +0000 (12:04 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 21 Nov 2012 20:04:45 +0000 (12:04 -0800)
added patches:
0040-libceph-rename-socket-callbacks.patch
0041-libceph-rename-kvec_reset-and-kvec_add-functions.patch
0042-libceph-embed-ceph-messenger-structure-in-ceph_clien.patch
0043-libceph-start-separating-connection-flags-from-state.patch
0044-libceph-start-tracking-connection-socket-state.patch
0045-libceph-provide-osd-number-when-creating-osd.patch
0046-libceph-set-CLOSED-state-bit-in-con_init.patch
0047-libceph-embed-ceph-connection-structure-in-mon_clien.patch
0048-libceph-drop-connection-refcounting-for-mon_client.patch
0049-libceph-init-monitor-connection-when-opening.patch
0050-libceph-fully-initialize-connection-in-con_init.patch
0051-libceph-tweak-ceph_alloc_msg.patch
0052-libceph-have-messages-point-to-their-connection.patch
0053-libceph-have-messages-take-a-connection-reference.patch
0054-libceph-make-ceph_con_revoke-a-msg-operation.patch
0055-libceph-make-ceph_con_revoke_message-a-msg-op.patch
0056-libceph-fix-overflow-in-__decode_pool_names.patch
0057-libceph-fix-overflow-in-osdmap_decode.patch
0058-libceph-fix-overflow-in-osdmap_apply_incremental.patch
0059-libceph-transition-socket-state-prior-to-actual-conn.patch

21 files changed:
queue-3.4/0040-libceph-rename-socket-callbacks.patch [new file with mode: 0644]
queue-3.4/0041-libceph-rename-kvec_reset-and-kvec_add-functions.patch [new file with mode: 0644]
queue-3.4/0042-libceph-embed-ceph-messenger-structure-in-ceph_clien.patch [new file with mode: 0644]
queue-3.4/0043-libceph-start-separating-connection-flags-from-state.patch [new file with mode: 0644]
queue-3.4/0044-libceph-start-tracking-connection-socket-state.patch [new file with mode: 0644]
queue-3.4/0045-libceph-provide-osd-number-when-creating-osd.patch [new file with mode: 0644]
queue-3.4/0046-libceph-set-CLOSED-state-bit-in-con_init.patch [new file with mode: 0644]
queue-3.4/0047-libceph-embed-ceph-connection-structure-in-mon_clien.patch [new file with mode: 0644]
queue-3.4/0048-libceph-drop-connection-refcounting-for-mon_client.patch [new file with mode: 0644]
queue-3.4/0049-libceph-init-monitor-connection-when-opening.patch [new file with mode: 0644]
queue-3.4/0050-libceph-fully-initialize-connection-in-con_init.patch [new file with mode: 0644]
queue-3.4/0051-libceph-tweak-ceph_alloc_msg.patch [new file with mode: 0644]
queue-3.4/0052-libceph-have-messages-point-to-their-connection.patch [new file with mode: 0644]
queue-3.4/0053-libceph-have-messages-take-a-connection-reference.patch [new file with mode: 0644]
queue-3.4/0054-libceph-make-ceph_con_revoke-a-msg-operation.patch [new file with mode: 0644]
queue-3.4/0055-libceph-make-ceph_con_revoke_message-a-msg-op.patch [new file with mode: 0644]
queue-3.4/0056-libceph-fix-overflow-in-__decode_pool_names.patch [new file with mode: 0644]
queue-3.4/0057-libceph-fix-overflow-in-osdmap_decode.patch [new file with mode: 0644]
queue-3.4/0058-libceph-fix-overflow-in-osdmap_apply_incremental.patch [new file with mode: 0644]
queue-3.4/0059-libceph-transition-socket-state-prior-to-actual-conn.patch [new file with mode: 0644]
queue-3.4/series

diff --git a/queue-3.4/0040-libceph-rename-socket-callbacks.patch b/queue-3.4/0040-libceph-rename-socket-callbacks.patch
new file mode 100644 (file)
index 0000000..56b904e
--- /dev/null
@@ -0,0 +1,112 @@
+From 5566f701581eecb9bb825d4db233256106ae9bd6 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Tue, 22 May 2012 11:41:43 -0500
+Subject: libceph: rename socket callbacks
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 327800bdc2cb9b71f4b458ca07aa9d522668dde0)
+
+Change the names of the three socket callback functions to make it
+more obvious they're specifically associated with a connection's
+socket (not the ceph connection that uses it).
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |   28 ++++++++++++++--------------
+ 1 file changed, 14 insertions(+), 14 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -153,46 +153,46 @@ EXPORT_SYMBOL(ceph_msgr_flush);
+  */
+ /* data available on socket, or listen socket received a connect */
+-static void ceph_data_ready(struct sock *sk, int count_unused)
++static void ceph_sock_data_ready(struct sock *sk, int count_unused)
+ {
+       struct ceph_connection *con = sk->sk_user_data;
+       if (sk->sk_state != TCP_CLOSE_WAIT) {
+-              dout("ceph_data_ready on %p state = %lu, queueing work\n",
++              dout("%s on %p state = %lu, queueing work\n", __func__,
+                    con, con->state);
+               queue_con(con);
+       }
+ }
+ /* socket has buffer space for writing */
+-static void ceph_write_space(struct sock *sk)
++static void ceph_sock_write_space(struct sock *sk)
+ {
+       struct ceph_connection *con = sk->sk_user_data;
+       /* only queue to workqueue if there is data we want to write,
+        * and there is sufficient space in the socket buffer to accept
+-       * more data.  clear SOCK_NOSPACE so that ceph_write_space()
++       * more data.  clear SOCK_NOSPACE so that ceph_sock_write_space()
+        * doesn't get called again until try_write() fills the socket
+        * buffer. See net/ipv4/tcp_input.c:tcp_check_space()
+        * and net/core/stream.c:sk_stream_write_space().
+        */
+       if (test_bit(WRITE_PENDING, &con->state)) {
+               if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+-                      dout("ceph_write_space %p queueing write work\n", con);
++                      dout("%s %p queueing write work\n", __func__, con);
+                       clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+                       queue_con(con);
+               }
+       } else {
+-              dout("ceph_write_space %p nothing to write\n", con);
++              dout("%s %p nothing to write\n", __func__, con);
+       }
+ }
+ /* socket's state has changed */
+-static void ceph_state_change(struct sock *sk)
++static void ceph_sock_state_change(struct sock *sk)
+ {
+       struct ceph_connection *con = sk->sk_user_data;
+-      dout("ceph_state_change %p state = %lu sk_state = %u\n",
++      dout("%s %p state = %lu sk_state = %u\n", __func__,
+            con, con->state, sk->sk_state);
+       if (test_bit(CLOSED, &con->state))
+@@ -200,9 +200,9 @@ static void ceph_state_change(struct soc
+       switch (sk->sk_state) {
+       case TCP_CLOSE:
+-              dout("ceph_state_change TCP_CLOSE\n");
++              dout("%s TCP_CLOSE\n", __func__);
+       case TCP_CLOSE_WAIT:
+-              dout("ceph_state_change TCP_CLOSE_WAIT\n");
++              dout("%s TCP_CLOSE_WAIT\n", __func__);
+               if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) {
+                       if (test_bit(CONNECTING, &con->state))
+                               con->error_msg = "connection failed";
+@@ -212,7 +212,7 @@ static void ceph_state_change(struct soc
+               }
+               break;
+       case TCP_ESTABLISHED:
+-              dout("ceph_state_change TCP_ESTABLISHED\n");
++              dout("%s TCP_ESTABLISHED\n", __func__);
+               queue_con(con);
+               break;
+       default:        /* Everything else is uninteresting */
+@@ -228,9 +228,9 @@ static void set_sock_callbacks(struct so
+ {
+       struct sock *sk = sock->sk;
+       sk->sk_user_data = con;
+-      sk->sk_data_ready = ceph_data_ready;
+-      sk->sk_write_space = ceph_write_space;
+-      sk->sk_state_change = ceph_state_change;
++      sk->sk_data_ready = ceph_sock_data_ready;
++      sk->sk_write_space = ceph_sock_write_space;
++      sk->sk_state_change = ceph_sock_state_change;
+ }
diff --git a/queue-3.4/0041-libceph-rename-kvec_reset-and-kvec_add-functions.patch b/queue-3.4/0041-libceph-rename-kvec_reset-and-kvec_add-functions.patch
new file mode 100644 (file)
index 0000000..f7c3624
--- /dev/null
@@ -0,0 +1,183 @@
+From 2a06b676ea95fd6a85292a8b93f205867f4cbfef Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 23 May 2012 14:35:23 -0500
+Subject: libceph: rename kvec_reset and kvec_add functions
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit e22004235a900213625acd6583ac913d5a30c155)
+
+The functions ceph_con_out_kvec_reset() and ceph_con_out_kvec_add()
+are entirely private functions, so drop the "ceph_" prefix in their
+name to make them slightly more wieldy.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |   48 ++++++++++++++++++++++++------------------------
+ 1 file changed, 24 insertions(+), 24 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -486,14 +486,14 @@ static u32 get_global_seq(struct ceph_me
+       return ret;
+ }
+-static void ceph_con_out_kvec_reset(struct ceph_connection *con)
++static void con_out_kvec_reset(struct ceph_connection *con)
+ {
+       con->out_kvec_left = 0;
+       con->out_kvec_bytes = 0;
+       con->out_kvec_cur = &con->out_kvec[0];
+ }
+-static void ceph_con_out_kvec_add(struct ceph_connection *con,
++static void con_out_kvec_add(struct ceph_connection *con,
+                               size_t size, void *data)
+ {
+       int index;
+@@ -534,7 +534,7 @@ static void prepare_write_message(struct
+       struct ceph_msg *m;
+       u32 crc;
+-      ceph_con_out_kvec_reset(con);
++      con_out_kvec_reset(con);
+       con->out_kvec_is_msg = true;
+       con->out_msg_done = false;
+@@ -542,9 +542,9 @@ static void prepare_write_message(struct
+        * TCP packet that's a good thing. */
+       if (con->in_seq > con->in_seq_acked) {
+               con->in_seq_acked = con->in_seq;
+-              ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
++              con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
+               con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
+-              ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack),
++              con_out_kvec_add(con, sizeof (con->out_temp_ack),
+                       &con->out_temp_ack);
+       }
+@@ -576,12 +576,12 @@ static void prepare_write_message(struct
+       BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
+       /* tag + hdr + front + middle */
+-      ceph_con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
+-      ceph_con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
+-      ceph_con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
++      con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
++      con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
++      con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
+       if (m->middle)
+-              ceph_con_out_kvec_add(con, m->middle->vec.iov_len,
++              con_out_kvec_add(con, m->middle->vec.iov_len,
+                       m->middle->vec.iov_base);
+       /* fill in crc (except data pages), footer */
+@@ -630,12 +630,12 @@ static void prepare_write_ack(struct cep
+            con->in_seq_acked, con->in_seq);
+       con->in_seq_acked = con->in_seq;
+-      ceph_con_out_kvec_reset(con);
++      con_out_kvec_reset(con);
+-      ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
++      con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
+       con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
+-      ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack),
++      con_out_kvec_add(con, sizeof (con->out_temp_ack),
+                               &con->out_temp_ack);
+       con->out_more = 1;  /* more will follow.. eventually.. */
+@@ -648,8 +648,8 @@ static void prepare_write_ack(struct cep
+ static void prepare_write_keepalive(struct ceph_connection *con)
+ {
+       dout("prepare_write_keepalive %p\n", con);
+-      ceph_con_out_kvec_reset(con);
+-      ceph_con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
++      con_out_kvec_reset(con);
++      con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
+       set_bit(WRITE_PENDING, &con->state);
+ }
+@@ -694,8 +694,8 @@ static struct ceph_auth_handshake *get_c
+  */
+ static void prepare_write_banner(struct ceph_connection *con)
+ {
+-      ceph_con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
+-      ceph_con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr),
++      con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
++      con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr),
+                                       &con->msgr->my_enc_addr);
+       con->out_more = 0;
+@@ -742,10 +742,10 @@ static int prepare_write_connect(struct
+       con->out_connect.authorizer_len = auth ?
+               cpu_to_le32(auth->authorizer_buf_len) : 0;
+-      ceph_con_out_kvec_add(con, sizeof (con->out_connect),
++      con_out_kvec_add(con, sizeof (con->out_connect),
+                                       &con->out_connect);
+       if (auth && auth->authorizer_buf_len)
+-              ceph_con_out_kvec_add(con, auth->authorizer_buf_len,
++              con_out_kvec_add(con, auth->authorizer_buf_len,
+                                       auth->authorizer_buf);
+       con->out_more = 0;
+@@ -939,7 +939,7 @@ static int write_partial_msg_pages(struc
+       /* prepare and queue up footer, too */
+       if (!do_datacrc)
+               con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
+-      ceph_con_out_kvec_reset(con);
++      con_out_kvec_reset(con);
+       prepare_write_message_footer(con);
+       ret = 1;
+ out:
+@@ -1402,7 +1402,7 @@ static int process_connect(struct ceph_c
+                       return -1;
+               }
+               con->auth_retry = 1;
+-              ceph_con_out_kvec_reset(con);
++              con_out_kvec_reset(con);
+               ret = prepare_write_connect(con);
+               if (ret < 0)
+                       return ret;
+@@ -1423,7 +1423,7 @@ static int process_connect(struct ceph_c
+                      ENTITY_NAME(con->peer_name),
+                      ceph_pr_addr(&con->peer_addr.in_addr));
+               reset_connection(con);
+-              ceph_con_out_kvec_reset(con);
++              con_out_kvec_reset(con);
+               ret = prepare_write_connect(con);
+               if (ret < 0)
+                       return ret;
+@@ -1449,7 +1449,7 @@ static int process_connect(struct ceph_c
+                    le32_to_cpu(con->out_connect.connect_seq),
+                    le32_to_cpu(con->in_reply.connect_seq));
+               con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
+-              ceph_con_out_kvec_reset(con);
++              con_out_kvec_reset(con);
+               ret = prepare_write_connect(con);
+               if (ret < 0)
+                       return ret;
+@@ -1466,7 +1466,7 @@ static int process_connect(struct ceph_c
+                    le32_to_cpu(con->in_reply.global_seq));
+               get_global_seq(con->msgr,
+                              le32_to_cpu(con->in_reply.global_seq));
+-              ceph_con_out_kvec_reset(con);
++              con_out_kvec_reset(con);
+               ret = prepare_write_connect(con);
+               if (ret < 0)
+                       return ret;
+@@ -1873,7 +1873,7 @@ more:
+       /* open the socket first? */
+       if (con->sock == NULL) {
+-              ceph_con_out_kvec_reset(con);
++              con_out_kvec_reset(con);
+               prepare_write_banner(con);
+               ret = prepare_write_connect(con);
+               if (ret < 0)
diff --git a/queue-3.4/0042-libceph-embed-ceph-messenger-structure-in-ceph_clien.patch b/queue-3.4/0042-libceph-embed-ceph-messenger-structure-in-ceph_clien.patch
new file mode 100644 (file)
index 0000000..0317e97
--- /dev/null
@@ -0,0 +1,208 @@
+From ab20b55d471452332ce9e1b76ea7a522999e2055 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Sat, 26 May 2012 23:26:43 -0500
+Subject: libceph: embed ceph messenger structure in ceph_client
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 15d9882c336db2db73ccf9871ae2398e452f694c)
+
+A ceph client has a pointer to a ceph messenger structure in it.
+There is always exactly one ceph messenger for a ceph client, so
+there is no need to allocate it separate from the ceph client
+structure.
+
+Switch the ceph_client structure to embed its ceph_messenger
+structure.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/mds_client.c           |    2 +-
+ include/linux/ceph/libceph.h   |    2 +-
+ include/linux/ceph/messenger.h |    9 +++++----
+ net/ceph/ceph_common.c         |   18 +++++-------------
+ net/ceph/messenger.c           |   30 +++++++++---------------------
+ net/ceph/mon_client.c          |    6 +++---
+ net/ceph/osd_client.c          |    4 ++--
+ 7 files changed, 26 insertions(+), 45 deletions(-)
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -394,7 +394,7 @@ static struct ceph_mds_session *register
+       s->s_seq = 0;
+       mutex_init(&s->s_mutex);
+-      ceph_con_init(mdsc->fsc->client->msgr, &s->s_con);
++      ceph_con_init(&mdsc->fsc->client->msgr, &s->s_con);
+       s->s_con.private = s;
+       s->s_con.ops = &mds_con_ops;
+       s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS;
+--- a/include/linux/ceph/libceph.h
++++ b/include/linux/ceph/libceph.h
+@@ -132,7 +132,7 @@ struct ceph_client {
+       u32 supported_features;
+       u32 required_features;
+-      struct ceph_messenger *msgr;   /* messenger instance */
++      struct ceph_messenger msgr;   /* messenger instance */
+       struct ceph_mon_client monc;
+       struct ceph_osd_client osdc;
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -203,10 +203,11 @@ extern int ceph_msgr_init(void);
+ extern void ceph_msgr_exit(void);
+ extern void ceph_msgr_flush(void);
+-extern struct ceph_messenger *ceph_messenger_create(
+-      struct ceph_entity_addr *myaddr,
+-      u32 features, u32 required);
+-extern void ceph_messenger_destroy(struct ceph_messenger *);
++extern void ceph_messenger_init(struct ceph_messenger *msgr,
++                      struct ceph_entity_addr *myaddr,
++                      u32 supported_features,
++                      u32 required_features,
++                      bool nocrc);
+ extern void ceph_con_init(struct ceph_messenger *msgr,
+                         struct ceph_connection *con);
+--- a/net/ceph/ceph_common.c
++++ b/net/ceph/ceph_common.c
+@@ -468,19 +468,15 @@ struct ceph_client *ceph_create_client(s
+       /* msgr */
+       if (ceph_test_opt(client, MYIP))
+               myaddr = &client->options->my_addr;
+-      client->msgr = ceph_messenger_create(myaddr,
+-                                           client->supported_features,
+-                                           client->required_features);
+-      if (IS_ERR(client->msgr)) {
+-              err = PTR_ERR(client->msgr);
+-              goto fail;
+-      }
+-      client->msgr->nocrc = ceph_test_opt(client, NOCRC);
++      ceph_messenger_init(&client->msgr, myaddr,
++              client->supported_features,
++              client->required_features,
++              ceph_test_opt(client, NOCRC));
+       /* subsystems */
+       err = ceph_monc_init(&client->monc, client);
+       if (err < 0)
+-              goto fail_msgr;
++              goto fail;
+       err = ceph_osdc_init(&client->osdc, client);
+       if (err < 0)
+               goto fail_monc;
+@@ -489,8 +485,6 @@ struct ceph_client *ceph_create_client(s
+ fail_monc:
+       ceph_monc_stop(&client->monc);
+-fail_msgr:
+-      ceph_messenger_destroy(client->msgr);
+ fail:
+       kfree(client);
+       return ERR_PTR(err);
+@@ -508,8 +502,6 @@ void ceph_destroy_client(struct ceph_cli
+       ceph_debugfs_client_cleanup(client);
+-      ceph_messenger_destroy(client->msgr);
+-
+       ceph_destroy_options(client->options);
+       kfree(client);
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2249,18 +2249,14 @@ out:
+ /*
+- * create a new messenger instance
++ * initialize a new messenger instance
+  */
+-struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
+-                                           u32 supported_features,
+-                                           u32 required_features)
++void ceph_messenger_init(struct ceph_messenger *msgr,
++                      struct ceph_entity_addr *myaddr,
++                      u32 supported_features,
++                      u32 required_features,
++                      bool nocrc)
+ {
+-      struct ceph_messenger *msgr;
+-
+-      msgr = kzalloc(sizeof(*msgr), GFP_KERNEL);
+-      if (msgr == NULL)
+-              return ERR_PTR(-ENOMEM);
+-
+       msgr->supported_features = supported_features;
+       msgr->required_features = required_features;
+@@ -2273,19 +2269,11 @@ struct ceph_messenger *ceph_messenger_cr
+       msgr->inst.addr.type = 0;
+       get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
+       encode_my_addr(msgr);
++      msgr->nocrc = nocrc;
+-      dout("messenger_create %p\n", msgr);
+-      return msgr;
+-}
+-EXPORT_SYMBOL(ceph_messenger_create);
+-
+-void ceph_messenger_destroy(struct ceph_messenger *msgr)
+-{
+-      dout("destroy %p\n", msgr);
+-      kfree(msgr);
+-      dout("destroyed messenger %p\n", msgr);
++      dout("%s %p\n", __func__, msgr);
+ }
+-EXPORT_SYMBOL(ceph_messenger_destroy);
++EXPORT_SYMBOL(ceph_messenger_init);
+ static void clear_standby(struct ceph_connection *con)
+ {
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -763,7 +763,7 @@ int ceph_monc_init(struct ceph_mon_clien
+       monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL);
+       if (!monc->con)
+               goto out_monmap;
+-      ceph_con_init(monc->client->msgr, monc->con);
++      ceph_con_init(&monc->client->msgr, monc->con);
+       monc->con->private = monc;
+       monc->con->ops = &mon_con_ops;
+@@ -888,8 +888,8 @@ static void handle_auth_reply(struct cep
+       } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
+               dout("authenticated, starting session\n");
+-              monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
+-              monc->client->msgr->inst.name.num =
++              monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
++              monc->client->msgr.inst.name.num =
+                                       cpu_to_le64(monc->auth->global_id);
+               __send_subscribe(monc);
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -639,7 +639,7 @@ static struct ceph_osd *create_osd(struc
+       INIT_LIST_HEAD(&osd->o_osd_lru);
+       osd->o_incarnation = 1;
+-      ceph_con_init(osdc->client->msgr, &osd->o_con);
++      ceph_con_init(&osdc->client->msgr, &osd->o_con);
+       osd->o_con.private = osd;
+       osd->o_con.ops = &osd_con_ops;
+       osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD;
+@@ -1391,7 +1391,7 @@ void ceph_osdc_handle_map(struct ceph_os
+                            epoch, maplen);
+                       newmap = osdmap_apply_incremental(&p, next,
+                                                         osdc->osdmap,
+-                                                        osdc->client->msgr);
++                                                        &osdc->client->msgr);
+                       if (IS_ERR(newmap)) {
+                               err = PTR_ERR(newmap);
+                               goto bad;
diff --git a/queue-3.4/0043-libceph-start-separating-connection-flags-from-state.patch b/queue-3.4/0043-libceph-start-separating-connection-flags-from-state.patch
new file mode 100644 (file)
index 0000000..d32529e
--- /dev/null
@@ -0,0 +1,257 @@
+From f47510a7851c8440845a0c1a7184323cc10ec15d Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Tue, 22 May 2012 11:41:43 -0500
+Subject: libceph: start separating connection flags from state
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 928443cd9644e7cfd46f687dbeffda2d1a357ff9)
+
+A ceph_connection holds a mixture of connection state (as in "state
+machine" state) and connection flags in a single "state" field.  To
+make the distinction more clear, define a new "flags" field and use
+it rather than the "state" field to hold Boolean flag values.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil<sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/messenger.h |   18 +++++++++-----
+ net/ceph/messenger.c           |   50 ++++++++++++++++++++---------------------
+ 2 files changed, 37 insertions(+), 31 deletions(-)
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -103,20 +103,25 @@ struct ceph_msg_pos {
+ #define MAX_DELAY_INTERVAL    (5 * 60 * HZ)
+ /*
+- * ceph_connection state bit flags
++ * ceph_connection flag bits
+  */
++
+ #define LOSSYTX         0  /* we can close channel or drop messages on errors */
+-#define CONNECTING    1
+-#define NEGOTIATING   2
+ #define KEEPALIVE_PENDING      3
+ #define WRITE_PENDING 4  /* we have data ready to send */
++#define SOCK_CLOSED   11 /* socket state changed to closed */
++#define BACKOFF         15
++
++/*
++ * ceph_connection states
++ */
++#define CONNECTING    1
++#define NEGOTIATING   2
+ #define STANDBY               8  /* no outgoing messages, socket closed.  we keep
+                           * the ceph_connection around to maintain shared
+                           * state with the peer. */
+ #define CLOSED                10 /* we've closed the connection */
+-#define SOCK_CLOSED   11 /* socket state changed to closed */
+ #define OPENING         13 /* open connection w/ (possibly new) peer */
+-#define BACKOFF         15
+ /*
+  * A single connection with another host.
+@@ -133,7 +138,8 @@ struct ceph_connection {
+       struct ceph_messenger *msgr;
+       struct socket *sock;
+-      unsigned long state;    /* connection state (see flags above) */
++      unsigned long flags;
++      unsigned long state;
+       const char *error_msg;  /* error message, if any */
+       struct ceph_entity_addr peer_addr; /* peer address */
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -176,7 +176,7 @@ static void ceph_sock_write_space(struct
+        * buffer. See net/ipv4/tcp_input.c:tcp_check_space()
+        * and net/core/stream.c:sk_stream_write_space().
+        */
+-      if (test_bit(WRITE_PENDING, &con->state)) {
++      if (test_bit(WRITE_PENDING, &con->flags)) {
+               if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+                       dout("%s %p queueing write work\n", __func__, con);
+                       clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+@@ -203,7 +203,7 @@ static void ceph_sock_state_change(struc
+               dout("%s TCP_CLOSE\n", __func__);
+       case TCP_CLOSE_WAIT:
+               dout("%s TCP_CLOSE_WAIT\n", __func__);
+-              if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) {
++              if (test_and_set_bit(SOCK_CLOSED, &con->flags) == 0) {
+                       if (test_bit(CONNECTING, &con->state))
+                               con->error_msg = "connection failed";
+                       else
+@@ -395,9 +395,9 @@ void ceph_con_close(struct ceph_connecti
+            ceph_pr_addr(&con->peer_addr.in_addr));
+       set_bit(CLOSED, &con->state);  /* in case there's queued work */
+       clear_bit(STANDBY, &con->state);  /* avoid connect_seq bump */
+-      clear_bit(LOSSYTX, &con->state);  /* so we retry next connect */
+-      clear_bit(KEEPALIVE_PENDING, &con->state);
+-      clear_bit(WRITE_PENDING, &con->state);
++      clear_bit(LOSSYTX, &con->flags);  /* so we retry next connect */
++      clear_bit(KEEPALIVE_PENDING, &con->flags);
++      clear_bit(WRITE_PENDING, &con->flags);
+       mutex_lock(&con->mutex);
+       reset_connection(con);
+       con->peer_global_seq = 0;
+@@ -618,7 +618,7 @@ static void prepare_write_message(struct
+               prepare_write_message_footer(con);
+       }
+-      set_bit(WRITE_PENDING, &con->state);
++      set_bit(WRITE_PENDING, &con->flags);
+ }
+ /*
+@@ -639,7 +639,7 @@ static void prepare_write_ack(struct cep
+                               &con->out_temp_ack);
+       con->out_more = 1;  /* more will follow.. eventually.. */
+-      set_bit(WRITE_PENDING, &con->state);
++      set_bit(WRITE_PENDING, &con->flags);
+ }
+ /*
+@@ -650,7 +650,7 @@ static void prepare_write_keepalive(stru
+       dout("prepare_write_keepalive %p\n", con);
+       con_out_kvec_reset(con);
+       con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
+-      set_bit(WRITE_PENDING, &con->state);
++      set_bit(WRITE_PENDING, &con->flags);
+ }
+ /*
+@@ -679,7 +679,7 @@ static struct ceph_auth_handshake *get_c
+       if (IS_ERR(auth))
+               return auth;
+-      if (test_bit(CLOSED, &con->state) || test_bit(OPENING, &con->state))
++      if (test_bit(CLOSED, &con->state) || test_bit(OPENING, &con->flags))
+               return ERR_PTR(-EAGAIN);
+       con->auth_reply_buf = auth->authorizer_reply_buf;
+@@ -699,7 +699,7 @@ static void prepare_write_banner(struct
+                                       &con->msgr->my_enc_addr);
+       con->out_more = 0;
+-      set_bit(WRITE_PENDING, &con->state);
++      set_bit(WRITE_PENDING, &con->flags);
+ }
+ static int prepare_write_connect(struct ceph_connection *con)
+@@ -749,7 +749,7 @@ static int prepare_write_connect(struct
+                                       auth->authorizer_buf);
+       con->out_more = 0;
+-      set_bit(WRITE_PENDING, &con->state);
++      set_bit(WRITE_PENDING, &con->flags);
+       return 0;
+ }
+@@ -1496,7 +1496,7 @@ static int process_connect(struct ceph_c
+                       le32_to_cpu(con->in_reply.connect_seq));
+               if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
+-                      set_bit(LOSSYTX, &con->state);
++                      set_bit(LOSSYTX, &con->flags);
+               prepare_read_tag(con);
+               break;
+@@ -1937,14 +1937,14 @@ do_next:
+                       prepare_write_ack(con);
+                       goto more;
+               }
+-              if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) {
++              if (test_and_clear_bit(KEEPALIVE_PENDING, &con->flags)) {
+                       prepare_write_keepalive(con);
+                       goto more;
+               }
+       }
+       /* Nothing to do! */
+-      clear_bit(WRITE_PENDING, &con->state);
++      clear_bit(WRITE_PENDING, &con->flags);
+       dout("try_write nothing else to write.\n");
+       ret = 0;
+ out:
+@@ -2110,7 +2110,7 @@ static void con_work(struct work_struct
+       mutex_lock(&con->mutex);
+ restart:
+-      if (test_and_clear_bit(BACKOFF, &con->state)) {
++      if (test_and_clear_bit(BACKOFF, &con->flags)) {
+               dout("con_work %p backing off\n", con);
+               if (queue_delayed_work(ceph_msgr_wq, &con->work,
+                                      round_jiffies_relative(con->delay))) {
+@@ -2139,7 +2139,7 @@ restart:
+               con_close_socket(con);
+       }
+-      if (test_and_clear_bit(SOCK_CLOSED, &con->state))
++      if (test_and_clear_bit(SOCK_CLOSED, &con->flags))
+               goto fault;
+       ret = try_read(con);
+@@ -2178,7 +2178,7 @@ static void ceph_fault(struct ceph_conne
+       dout("fault %p state %lu to peer %s\n",
+            con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
+-      if (test_bit(LOSSYTX, &con->state)) {
++      if (test_bit(LOSSYTX, &con->flags)) {
+               dout("fault on LOSSYTX channel\n");
+               goto out;
+       }
+@@ -2200,9 +2200,9 @@ static void ceph_fault(struct ceph_conne
+       /* If there are no messages queued or keepalive pending, place
+        * the connection in a STANDBY state */
+       if (list_empty(&con->out_queue) &&
+-          !test_bit(KEEPALIVE_PENDING, &con->state)) {
++          !test_bit(KEEPALIVE_PENDING, &con->flags)) {
+               dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
+-              clear_bit(WRITE_PENDING, &con->state);
++              clear_bit(WRITE_PENDING, &con->flags);
+               set_bit(STANDBY, &con->state);
+       } else {
+               /* retry after a delay. */
+@@ -2226,7 +2226,7 @@ static void ceph_fault(struct ceph_conne
+                        * that when con_work restarts we schedule the
+                        * delay then.
+                        */
+-                      set_bit(BACKOFF, &con->state);
++                      set_bit(BACKOFF, &con->flags);
+               }
+       }
+@@ -2282,8 +2282,8 @@ static void clear_standby(struct ceph_co
+               mutex_lock(&con->mutex);
+               dout("clear_standby %p and ++connect_seq\n", con);
+               con->connect_seq++;
+-              WARN_ON(test_bit(WRITE_PENDING, &con->state));
+-              WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state));
++              WARN_ON(test_bit(WRITE_PENDING, &con->flags));
++              WARN_ON(test_bit(KEEPALIVE_PENDING, &con->flags));
+               mutex_unlock(&con->mutex);
+       }
+ }
+@@ -2321,7 +2321,7 @@ void ceph_con_send(struct ceph_connectio
+       /* if there wasn't anything waiting to send before, queue
+        * new work */
+       clear_standby(con);
+-      if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
++      if (test_and_set_bit(WRITE_PENDING, &con->flags) == 0)
+               queue_con(con);
+ }
+ EXPORT_SYMBOL(ceph_con_send);
+@@ -2388,8 +2388,8 @@ void ceph_con_keepalive(struct ceph_conn
+ {
+       dout("con_keepalive %p\n", con);
+       clear_standby(con);
+-      if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 &&
+-          test_and_set_bit(WRITE_PENDING, &con->state) == 0)
++      if (test_and_set_bit(KEEPALIVE_PENDING, &con->flags) == 0 &&
++          test_and_set_bit(WRITE_PENDING, &con->flags) == 0)
+               queue_con(con);
+ }
+ EXPORT_SYMBOL(ceph_con_keepalive);
diff --git a/queue-3.4/0044-libceph-start-tracking-connection-socket-state.patch b/queue-3.4/0044-libceph-start-tracking-connection-socket-state.patch
new file mode 100644 (file)
index 0000000..ec54452
--- /dev/null
@@ -0,0 +1,199 @@
+From 1dc8c9af19ebb0486a3d573579358c4a3f918bb0 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Tue, 22 May 2012 22:15:49 -0500
+Subject: libceph: start tracking connection socket state
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit ce2c8903e76e690846a00a0284e4bd9ee954d680)
+
+Start explicitly keeping track of the state of a ceph connection's
+socket, separate from the state of the connection itself.  Create
+placeholder functions to encapsulate the state transitions.
+
+    --------
+    | NEW* |  transient initial state
+    --------
+        | con_sock_state_init()
+        v
+    ----------
+    | CLOSED |  initialized, but no socket (and no
+    ----------  TCP connection)
+     ^      \
+     |       \ con_sock_state_connecting()
+     |        ----------------------
+     |                              \
+     + con_sock_state_closed()       \
+     |\                               \
+     | \                               \
+     |  -----------                     \
+     |  | CLOSING |  socket event;       \
+     |  -----------  await close          \
+     |       ^                            |
+     |       |                            |
+     |       + con_sock_state_closing()   |
+     |      / \                           |
+     |     /   ---------------            |
+     |    /                   \           v
+     |   /                    --------------
+     |  /    -----------------| CONNECTING |  socket created, TCP
+     |  |   /                 --------------  connect initiated
+     |  |   | con_sock_state_connected()
+     |  |   v
+    -------------
+    | CONNECTED |  TCP connection established
+    -------------
+
+Make the socket state an atomic variable, reinforcing that it's a
+distinct transtion with no possible "intermediate/both" states.
+This is almost certainly overkill at this point, though the
+transitions into CONNECTED and CLOSING state do get called via
+socket callback (the rest of the transitions occur with the
+connection mutex held).  We can back out the atomicity later.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil<sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/messenger.h |    8 +++--
+ net/ceph/messenger.c           |   64 +++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 70 insertions(+), 2 deletions(-)
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -137,14 +137,18 @@ struct ceph_connection {
+       const struct ceph_connection_operations *ops;
+       struct ceph_messenger *msgr;
++
++      atomic_t sock_state;
+       struct socket *sock;
++      struct ceph_entity_addr peer_addr; /* peer address */
++      struct ceph_entity_addr peer_addr_for_me;
++
+       unsigned long flags;
+       unsigned long state;
+       const char *error_msg;  /* error message, if any */
+-      struct ceph_entity_addr peer_addr; /* peer address */
+       struct ceph_entity_name peer_name; /* peer name */
+-      struct ceph_entity_addr peer_addr_for_me;
++
+       unsigned peer_features;
+       u32 connect_seq;      /* identify the most recent connection
+                                attempt for this connection, client */
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -29,6 +29,14 @@
+  * the sender.
+  */
++/* State values for ceph_connection->sock_state; NEW is assumed to be 0 */
++
++#define CON_SOCK_STATE_NEW            0       /* -> CLOSED */
++#define CON_SOCK_STATE_CLOSED         1       /* -> CONNECTING */
++#define CON_SOCK_STATE_CONNECTING     2       /* -> CONNECTED or -> CLOSING */
++#define CON_SOCK_STATE_CONNECTED      3       /* -> CLOSING or -> CLOSED */
++#define CON_SOCK_STATE_CLOSING                4       /* -> CLOSED */
++
+ /* static tag bytes (protocol control messages) */
+ static char tag_msg = CEPH_MSGR_TAG_MSG;
+ static char tag_ack = CEPH_MSGR_TAG_ACK;
+@@ -147,6 +155,55 @@ void ceph_msgr_flush(void)
+ }
+ EXPORT_SYMBOL(ceph_msgr_flush);
++/* Connection socket state transition functions */
++
++static void con_sock_state_init(struct ceph_connection *con)
++{
++      int old_state;
++
++      old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
++      if (WARN_ON(old_state != CON_SOCK_STATE_NEW))
++              printk("%s: unexpected old state %d\n", __func__, old_state);
++}
++
++static void con_sock_state_connecting(struct ceph_connection *con)
++{
++      int old_state;
++
++      old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTING);
++      if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED))
++              printk("%s: unexpected old state %d\n", __func__, old_state);
++}
++
++static void con_sock_state_connected(struct ceph_connection *con)
++{
++      int old_state;
++
++      old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED);
++      if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING))
++              printk("%s: unexpected old state %d\n", __func__, old_state);
++}
++
++static void con_sock_state_closing(struct ceph_connection *con)
++{
++      int old_state;
++
++      old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING);
++      if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING &&
++                      old_state != CON_SOCK_STATE_CONNECTED &&
++                      old_state != CON_SOCK_STATE_CLOSING))
++              printk("%s: unexpected old state %d\n", __func__, old_state);
++}
++
++static void con_sock_state_closed(struct ceph_connection *con)
++{
++      int old_state;
++
++      old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
++      if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED &&
++                      old_state != CON_SOCK_STATE_CLOSING))
++              printk("%s: unexpected old state %d\n", __func__, old_state);
++}
+ /*
+  * socket callback functions
+@@ -203,6 +260,7 @@ static void ceph_sock_state_change(struc
+               dout("%s TCP_CLOSE\n", __func__);
+       case TCP_CLOSE_WAIT:
+               dout("%s TCP_CLOSE_WAIT\n", __func__);
++              con_sock_state_closing(con);
+               if (test_and_set_bit(SOCK_CLOSED, &con->flags) == 0) {
+                       if (test_bit(CONNECTING, &con->state))
+                               con->error_msg = "connection failed";
+@@ -213,6 +271,7 @@ static void ceph_sock_state_change(struc
+               break;
+       case TCP_ESTABLISHED:
+               dout("%s TCP_ESTABLISHED\n", __func__);
++              con_sock_state_connected(con);
+               queue_con(con);
+               break;
+       default:        /* Everything else is uninteresting */
+@@ -277,6 +336,7 @@ static int ceph_tcp_connect(struct ceph_
+               return ret;
+       }
+       con->sock = sock;
++      con_sock_state_connecting(con);
+       return 0;
+ }
+@@ -343,6 +403,7 @@ static int con_close_socket(struct ceph_
+       sock_release(con->sock);
+       con->sock = NULL;
+       clear_bit(SOCK_CLOSED, &con->state);
++      con_sock_state_closed(con);
+       return rc;
+ }
+@@ -462,6 +523,9 @@ void ceph_con_init(struct ceph_messenger
+       memset(con, 0, sizeof(*con));
+       atomic_set(&con->nref, 1);
+       con->msgr = msgr;
++
++      con_sock_state_init(con);
++
+       mutex_init(&con->mutex);
+       INIT_LIST_HEAD(&con->out_queue);
+       INIT_LIST_HEAD(&con->out_sent);
diff --git a/queue-3.4/0045-libceph-provide-osd-number-when-creating-osd.patch b/queue-3.4/0045-libceph-provide-osd-number-when-creating-osd.patch
new file mode 100644 (file)
index 0000000..04b23d6
--- /dev/null
@@ -0,0 +1,63 @@
+From 3cfa1d37bc05179577c43f4ed6b2689556729813 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Sat, 26 May 2012 23:26:43 -0500
+Subject: libceph: provide osd number when creating osd
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit e10006f807ffc4d5b1d861305d18d9e8145891ca)
+
+Pass the osd number to the create_osd() routine, and move the
+initialization of fields that depend on it therein.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/osd_client.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -624,7 +624,7 @@ static void osd_reset(struct ceph_connec
+ /*
+  * Track open sessions with osds.
+  */
+-static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
++static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
+ {
+       struct ceph_osd *osd;
+@@ -634,6 +634,7 @@ static struct ceph_osd *create_osd(struc
+       atomic_set(&osd->o_ref, 1);
+       osd->o_osdc = osdc;
++      osd->o_osd = onum;
+       INIT_LIST_HEAD(&osd->o_requests);
+       INIT_LIST_HEAD(&osd->o_linger_requests);
+       INIT_LIST_HEAD(&osd->o_osd_lru);
+@@ -643,6 +644,7 @@ static struct ceph_osd *create_osd(struc
+       osd->o_con.private = osd;
+       osd->o_con.ops = &osd_con_ops;
+       osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD;
++      osd->o_con.peer_name.num = cpu_to_le64(onum);
+       INIT_LIST_HEAD(&osd->o_keepalive_item);
+       return osd;
+@@ -998,15 +1000,13 @@ static int __map_request(struct ceph_osd
+       req->r_osd = __lookup_osd(osdc, o);
+       if (!req->r_osd && o >= 0) {
+               err = -ENOMEM;
+-              req->r_osd = create_osd(osdc);
++              req->r_osd = create_osd(osdc, o);
+               if (!req->r_osd) {
+                       list_move(&req->r_req_lru_item, &osdc->req_notarget);
+                       goto out;
+               }
+               dout("map_request osd %p is osd%d\n", req->r_osd, o);
+-              req->r_osd->o_osd = o;
+-              req->r_osd->o_con.peer_name.num = cpu_to_le64(o);
+               __insert_osd(osdc, req->r_osd);
+               ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]);
diff --git a/queue-3.4/0046-libceph-set-CLOSED-state-bit-in-con_init.patch b/queue-3.4/0046-libceph-set-CLOSED-state-bit-in-con_init.patch
new file mode 100644 (file)
index 0000000..8ec7651
--- /dev/null
@@ -0,0 +1,83 @@
+From 2a162524ff9c9635cf040179c1f587b08fe5efa7 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Tue, 29 May 2012 11:04:58 -0500
+Subject: libceph: set CLOSED state bit in con_init
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit a5988c490ef66cb04ea2f610681949b25c773b3c)
+
+Once a connection is fully initialized, it is really in a CLOSED
+state, so make that explicit by setting the bit in its state field.
+
+It is possible for a connection in NEGOTIATING state to get a
+failure, leading to ceph_fault() and ultimately ceph_con_close().
+Clear that bits if it is set in that case, to reflect that the
+connection truly is closed and is no longer participating in a
+connect sequence.
+
+Issue a warning if ceph_con_open() is called on a connection that
+is not in CLOSED state.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |   15 +++++++++++----
+ 1 file changed, 11 insertions(+), 4 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -454,11 +454,14 @@ void ceph_con_close(struct ceph_connecti
+ {
+       dout("con_close %p peer %s\n", con,
+            ceph_pr_addr(&con->peer_addr.in_addr));
+-      set_bit(CLOSED, &con->state);  /* in case there's queued work */
++      clear_bit(NEGOTIATING, &con->state);
+       clear_bit(STANDBY, &con->state);  /* avoid connect_seq bump */
++      set_bit(CLOSED, &con->state);
++
+       clear_bit(LOSSYTX, &con->flags);  /* so we retry next connect */
+       clear_bit(KEEPALIVE_PENDING, &con->flags);
+       clear_bit(WRITE_PENDING, &con->flags);
++
+       mutex_lock(&con->mutex);
+       reset_connection(con);
+       con->peer_global_seq = 0;
+@@ -475,7 +478,8 @@ void ceph_con_open(struct ceph_connectio
+ {
+       dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
+       set_bit(OPENING, &con->state);
+-      clear_bit(CLOSED, &con->state);
++      WARN_ON(!test_and_clear_bit(CLOSED, &con->state));
++
+       memcpy(&con->peer_addr, addr, sizeof(*addr));
+       con->delay = 0;      /* reset backoff memory */
+       queue_con(con);
+@@ -530,6 +534,8 @@ void ceph_con_init(struct ceph_messenger
+       INIT_LIST_HEAD(&con->out_queue);
+       INIT_LIST_HEAD(&con->out_sent);
+       INIT_DELAYED_WORK(&con->work, con_work);
++
++      set_bit(CLOSED, &con->state);
+ }
+ EXPORT_SYMBOL(ceph_con_init);
+@@ -1937,14 +1943,15 @@ more:
+       /* open the socket first? */
+       if (con->sock == NULL) {
++              clear_bit(NEGOTIATING, &con->state);
++              set_bit(CONNECTING, &con->state);
++
+               con_out_kvec_reset(con);
+               prepare_write_banner(con);
+               ret = prepare_write_connect(con);
+               if (ret < 0)
+                       goto out;
+               prepare_read_banner(con);
+-              set_bit(CONNECTING, &con->state);
+-              clear_bit(NEGOTIATING, &con->state);
+               BUG_ON(con->in_msg);
+               con->in_tag = CEPH_MSGR_TAG_READY;
diff --git a/queue-3.4/0047-libceph-embed-ceph-connection-structure-in-mon_clien.patch b/queue-3.4/0047-libceph-embed-ceph-connection-structure-in-mon_clien.patch
new file mode 100644 (file)
index 0000000..1629350
--- /dev/null
@@ -0,0 +1,178 @@
+From ac495165ac2a91db87b344c4e769e3eccac72f89 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Sat, 26 May 2012 23:26:43 -0500
+Subject: libceph: embed ceph connection structure in mon_client
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 67130934fb579fdf0f2f6d745960264378b57dc8)
+
+A monitor client has a pointer to a ceph connection structure in it.
+This is the only one of the three ceph client types that do it this
+way; the OSD and MDS clients embed the connection into their main
+structures.  There is always exactly one ceph connection for a
+monitor client, so there is no need to allocate it separate from the
+monitor client structure.
+
+So switch the ceph_mon_client structure to embed its
+ceph_connection structure.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/mon_client.h |    2 -
+ net/ceph/mon_client.c           |   47 +++++++++++++++++-----------------------
+ 2 files changed, 21 insertions(+), 28 deletions(-)
+
+--- a/include/linux/ceph/mon_client.h
++++ b/include/linux/ceph/mon_client.h
+@@ -70,7 +70,7 @@ struct ceph_mon_client {
+       bool hunting;
+       int cur_mon;                       /* last monitor i contacted */
+       unsigned long sub_sent, sub_renew_after;
+-      struct ceph_connection *con;
++      struct ceph_connection con;
+       bool have_fsid;
+       /* pending generic requests */
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -106,9 +106,9 @@ static void __send_prepared_auth_request
+       monc->pending_auth = 1;
+       monc->m_auth->front.iov_len = len;
+       monc->m_auth->hdr.front_len = cpu_to_le32(len);
+-      ceph_con_revoke(monc->con, monc->m_auth);
++      ceph_con_revoke(&monc->con, monc->m_auth);
+       ceph_msg_get(monc->m_auth);  /* keep our ref */
+-      ceph_con_send(monc->con, monc->m_auth);
++      ceph_con_send(&monc->con, monc->m_auth);
+ }
+ /*
+@@ -117,8 +117,8 @@ static void __send_prepared_auth_request
+ static void __close_session(struct ceph_mon_client *monc)
+ {
+       dout("__close_session closing mon%d\n", monc->cur_mon);
+-      ceph_con_revoke(monc->con, monc->m_auth);
+-      ceph_con_close(monc->con);
++      ceph_con_revoke(&monc->con, monc->m_auth);
++      ceph_con_close(&monc->con);
+       monc->cur_mon = -1;
+       monc->pending_auth = 0;
+       ceph_auth_reset(monc->auth);
+@@ -142,9 +142,9 @@ static int __open_session(struct ceph_mo
+               monc->want_next_osdmap = !!monc->want_next_osdmap;
+               dout("open_session mon%d opening\n", monc->cur_mon);
+-              monc->con->peer_name.type = CEPH_ENTITY_TYPE_MON;
+-              monc->con->peer_name.num = cpu_to_le64(monc->cur_mon);
+-              ceph_con_open(monc->con,
++              monc->con.peer_name.type = CEPH_ENTITY_TYPE_MON;
++              monc->con.peer_name.num = cpu_to_le64(monc->cur_mon);
++              ceph_con_open(&monc->con,
+                             &monc->monmap->mon_inst[monc->cur_mon].addr);
+               /* initiatiate authentication handshake */
+@@ -226,8 +226,8 @@ static void __send_subscribe(struct ceph
+               msg->front.iov_len = p - msg->front.iov_base;
+               msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+-              ceph_con_revoke(monc->con, msg);
+-              ceph_con_send(monc->con, ceph_msg_get(msg));
++              ceph_con_revoke(&monc->con, msg);
++              ceph_con_send(&monc->con, ceph_msg_get(msg));
+               monc->sub_sent = jiffies | 1;  /* never 0 */
+       }
+@@ -247,7 +247,7 @@ static void handle_subscribe_ack(struct
+       if (monc->hunting) {
+               pr_info("mon%d %s session established\n",
+                       monc->cur_mon,
+-                      ceph_pr_addr(&monc->con->peer_addr.in_addr));
++                      ceph_pr_addr(&monc->con.peer_addr.in_addr));
+               monc->hunting = false;
+       }
+       dout("handle_subscribe_ack after %d seconds\n", seconds);
+@@ -461,7 +461,7 @@ static int do_generic_request(struct cep
+       req->request->hdr.tid = cpu_to_le64(req->tid);
+       __insert_generic_request(monc, req);
+       monc->num_generic_requests++;
+-      ceph_con_send(monc->con, ceph_msg_get(req->request));
++      ceph_con_send(&monc->con, ceph_msg_get(req->request));
+       mutex_unlock(&monc->mutex);
+       err = wait_for_completion_interruptible(&req->completion);
+@@ -684,8 +684,8 @@ static void __resend_generic_request(str
+       for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) {
+               req = rb_entry(p, struct ceph_mon_generic_request, node);
+-              ceph_con_revoke(monc->con, req->request);
+-              ceph_con_send(monc->con, ceph_msg_get(req->request));
++              ceph_con_revoke(&monc->con, req->request);
++              ceph_con_send(&monc->con, ceph_msg_get(req->request));
+       }
+ }
+@@ -705,7 +705,7 @@ static void delayed_work(struct work_str
+               __close_session(monc);
+               __open_session(monc);  /* continue hunting */
+       } else {
+-              ceph_con_keepalive(monc->con);
++              ceph_con_keepalive(&monc->con);
+               __validate_auth(monc);
+@@ -760,19 +760,16 @@ int ceph_monc_init(struct ceph_mon_clien
+               goto out;
+       /* connection */
+-      monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL);
+-      if (!monc->con)
+-              goto out_monmap;
+-      ceph_con_init(&monc->client->msgr, monc->con);
+-      monc->con->private = monc;
+-      monc->con->ops = &mon_con_ops;
++      ceph_con_init(&monc->client->msgr, &monc->con);
++      monc->con.private = monc;
++      monc->con.ops = &mon_con_ops;
+       /* authentication */
+       monc->auth = ceph_auth_init(cl->options->name,
+                                   cl->options->key);
+       if (IS_ERR(monc->auth)) {
+               err = PTR_ERR(monc->auth);
+-              goto out_con;
++              goto out_monmap;
+       }
+       monc->auth->want_keys =
+               CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON |
+@@ -824,8 +821,6 @@ out_subscribe_ack:
+       ceph_msg_put(monc->m_subscribe_ack);
+ out_auth:
+       ceph_auth_destroy(monc->auth);
+-out_con:
+-      monc->con->ops->put(monc->con);
+ out_monmap:
+       kfree(monc->monmap);
+ out:
+@@ -841,9 +836,7 @@ void ceph_monc_stop(struct ceph_mon_clie
+       mutex_lock(&monc->mutex);
+       __close_session(monc);
+-      monc->con->private = NULL;
+-      monc->con->ops->put(monc->con);
+-      monc->con = NULL;
++      monc->con.private = NULL;
+       mutex_unlock(&monc->mutex);
+@@ -1029,7 +1022,7 @@ static void mon_fault(struct ceph_connec
+       if (!monc->hunting)
+               pr_info("mon%d %s session lost, "
+                       "hunting for new mon\n", monc->cur_mon,
+-                      ceph_pr_addr(&monc->con->peer_addr.in_addr));
++                      ceph_pr_addr(&monc->con.peer_addr.in_addr));
+       __close_session(monc);
+       if (!monc->hunting) {
diff --git a/queue-3.4/0048-libceph-drop-connection-refcounting-for-mon_client.patch b/queue-3.4/0048-libceph-drop-connection-refcounting-for-mon_client.patch
new file mode 100644 (file)
index 0000000..a8dfae4
--- /dev/null
@@ -0,0 +1,48 @@
+From 646a893f1d8346dc1b2826c684de99e5df37d5ed Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Thu, 31 May 2012 20:27:50 -0700
+Subject: libceph: drop connection refcounting for mon_client
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit ec87ef4309d33bd9c87a53bb5152a86ae7a65f25)
+
+All references to the embedded ceph_connection come from the msgr
+workqueue, which is drained prior to mon_client destruction.  That
+means we can ignore con refcounting entirely.
+
+Signed-off-by: Sage Weil <sage@newdream.net>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/mon_client.c |   18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -1037,9 +1037,23 @@ out:
+       mutex_unlock(&monc->mutex);
+ }
++/*
++ * We can ignore refcounting on the connection struct, as all references
++ * will come from the messenger workqueue, which is drained prior to
++ * mon_client destruction.
++ */
++static struct ceph_connection *con_get(struct ceph_connection *con)
++{
++      return con;
++}
++
++static void con_put(struct ceph_connection *con)
++{
++}
++
+ static const struct ceph_connection_operations mon_con_ops = {
+-      .get = ceph_con_get,
+-      .put = ceph_con_put,
++      .get = con_get,
++      .put = con_put,
+       .dispatch = dispatch,
+       .fault = mon_fault,
+       .alloc_msg = mon_alloc_msg,
diff --git a/queue-3.4/0049-libceph-init-monitor-connection-when-opening.patch b/queue-3.4/0049-libceph-init-monitor-connection-when-opening.patch
new file mode 100644 (file)
index 0000000..b1050da
--- /dev/null
@@ -0,0 +1,64 @@
+From efea1a38cd969f4b52ec5cc468d8143bb496efc2 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Sat, 26 May 2012 23:26:43 -0500
+Subject: libceph: init monitor connection when opening
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 20581c1faf7b15ae1f8b80c0ec757877b0b53151)
+
+Hold off initializing a monitor client's connection until just
+before it gets opened for use.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/mon_client.c |   13 ++++++-------
+ 1 file changed, 6 insertions(+), 7 deletions(-)
+
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -119,6 +119,7 @@ static void __close_session(struct ceph_
+       dout("__close_session closing mon%d\n", monc->cur_mon);
+       ceph_con_revoke(&monc->con, monc->m_auth);
+       ceph_con_close(&monc->con);
++      monc->con.private = NULL;
+       monc->cur_mon = -1;
+       monc->pending_auth = 0;
+       ceph_auth_reset(monc->auth);
+@@ -141,9 +142,13 @@ static int __open_session(struct ceph_mo
+               monc->sub_renew_after = jiffies;  /* i.e., expired */
+               monc->want_next_osdmap = !!monc->want_next_osdmap;
+-              dout("open_session mon%d opening\n", monc->cur_mon);
++              ceph_con_init(&monc->client->msgr, &monc->con);
++              monc->con.private = monc;
++              monc->con.ops = &mon_con_ops;
+               monc->con.peer_name.type = CEPH_ENTITY_TYPE_MON;
+               monc->con.peer_name.num = cpu_to_le64(monc->cur_mon);
++
++              dout("open_session mon%d opening\n", monc->cur_mon);
+               ceph_con_open(&monc->con,
+                             &monc->monmap->mon_inst[monc->cur_mon].addr);
+@@ -760,10 +765,6 @@ int ceph_monc_init(struct ceph_mon_clien
+               goto out;
+       /* connection */
+-      ceph_con_init(&monc->client->msgr, &monc->con);
+-      monc->con.private = monc;
+-      monc->con.ops = &mon_con_ops;
+-
+       /* authentication */
+       monc->auth = ceph_auth_init(cl->options->name,
+                                   cl->options->key);
+@@ -836,8 +837,6 @@ void ceph_monc_stop(struct ceph_mon_clie
+       mutex_lock(&monc->mutex);
+       __close_session(monc);
+-      monc->con.private = NULL;
+-
+       mutex_unlock(&monc->mutex);
+       /*
diff --git a/queue-3.4/0050-libceph-fully-initialize-connection-in-con_init.patch b/queue-3.4/0050-libceph-fully-initialize-connection-in-con_init.patch
new file mode 100644 (file)
index 0000000..82ec588
--- /dev/null
@@ -0,0 +1,116 @@
+From 449c48298a24fb8a48f6e0574f098ddfe6c81325 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Sat, 26 May 2012 23:26:43 -0500
+Subject: libceph: fully initialize connection in con_init()
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 1bfd89f4e6e1adc6a782d94aa5d4c53be1e404d7)
+
+Move the initialization of a ceph connection's private pointer,
+operations vector pointer, and peer name information into
+ceph_con_init().  Rearrange the arguments so the connection pointer
+is first.  Hide the byte-swapping of the peer entity number inside
+ceph_con_init()
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/mds_client.c           |    7 ++-----
+ include/linux/ceph/messenger.h |    6 ++++--
+ net/ceph/messenger.c           |    9 ++++++++-
+ net/ceph/mon_client.c          |    8 +++-----
+ net/ceph/osd_client.c          |    7 ++-----
+ 5 files changed, 19 insertions(+), 18 deletions(-)
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -394,11 +394,8 @@ static struct ceph_mds_session *register
+       s->s_seq = 0;
+       mutex_init(&s->s_mutex);
+-      ceph_con_init(&mdsc->fsc->client->msgr, &s->s_con);
+-      s->s_con.private = s;
+-      s->s_con.ops = &mds_con_ops;
+-      s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS;
+-      s->s_con.peer_name.num = cpu_to_le64(mds);
++      ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr,
++              CEPH_ENTITY_TYPE_MDS, mds);
+       spin_lock_init(&s->s_gen_ttl_lock);
+       s->s_cap_gen = 0;
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -219,8 +219,10 @@ extern void ceph_messenger_init(struct c
+                       u32 required_features,
+                       bool nocrc);
+-extern void ceph_con_init(struct ceph_messenger *msgr,
+-                        struct ceph_connection *con);
++extern void ceph_con_init(struct ceph_connection *con, void *private,
++                      const struct ceph_connection_operations *ops,
++                      struct ceph_messenger *msgr, __u8 entity_type,
++                      __u64 entity_num);
+ extern void ceph_con_open(struct ceph_connection *con,
+                         struct ceph_entity_addr *addr);
+ extern bool ceph_con_opened(struct ceph_connection *con);
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -521,15 +521,22 @@ void ceph_con_put(struct ceph_connection
+ /*
+  * initialize a new connection.
+  */
+-void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con)
++void ceph_con_init(struct ceph_connection *con, void *private,
++      const struct ceph_connection_operations *ops,
++      struct ceph_messenger *msgr, __u8 entity_type, __u64 entity_num)
+ {
+       dout("con_init %p\n", con);
+       memset(con, 0, sizeof(*con));
++      con->private = private;
++      con->ops = ops;
+       atomic_set(&con->nref, 1);
+       con->msgr = msgr;
+       con_sock_state_init(con);
++      con->peer_name.type = (__u8) entity_type;
++      con->peer_name.num = cpu_to_le64(entity_num);
++
+       mutex_init(&con->mutex);
+       INIT_LIST_HEAD(&con->out_queue);
+       INIT_LIST_HEAD(&con->out_sent);
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -142,11 +142,9 @@ static int __open_session(struct ceph_mo
+               monc->sub_renew_after = jiffies;  /* i.e., expired */
+               monc->want_next_osdmap = !!monc->want_next_osdmap;
+-              ceph_con_init(&monc->client->msgr, &monc->con);
+-              monc->con.private = monc;
+-              monc->con.ops = &mon_con_ops;
+-              monc->con.peer_name.type = CEPH_ENTITY_TYPE_MON;
+-              monc->con.peer_name.num = cpu_to_le64(monc->cur_mon);
++              ceph_con_init(&monc->con, monc, &mon_con_ops,
++                      &monc->client->msgr,
++                      CEPH_ENTITY_TYPE_MON, monc->cur_mon);
+               dout("open_session mon%d opening\n", monc->cur_mon);
+               ceph_con_open(&monc->con,
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -640,11 +640,8 @@ static struct ceph_osd *create_osd(struc
+       INIT_LIST_HEAD(&osd->o_osd_lru);
+       osd->o_incarnation = 1;
+-      ceph_con_init(&osdc->client->msgr, &osd->o_con);
+-      osd->o_con.private = osd;
+-      osd->o_con.ops = &osd_con_ops;
+-      osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD;
+-      osd->o_con.peer_name.num = cpu_to_le64(onum);
++      ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr,
++              CEPH_ENTITY_TYPE_OSD, onum);
+       INIT_LIST_HEAD(&osd->o_keepalive_item);
+       return osd;
diff --git a/queue-3.4/0051-libceph-tweak-ceph_alloc_msg.patch b/queue-3.4/0051-libceph-tweak-ceph_alloc_msg.patch
new file mode 100644 (file)
index 0000000..00c620f
--- /dev/null
@@ -0,0 +1,171 @@
+From 3b865e1b62c6088a110cad70ea725f65775c0c5e Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 4 Jun 2012 14:43:32 -0500
+Subject: libceph: tweak ceph_alloc_msg()
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 1c20f2d26795803fc4f5155fe4fca5717a5944b6)
+
+The function ceph_alloc_msg() is only used to allocate a message
+that will be assigned to a connection's in_msg pointer.  Rename the
+function so this implied usage is more clear.
+
+In addition, make that assignment inside the function (again, since
+that's precisely what it's intended to be used for).  This allows us
+to return what is now provided via the passed-in address of a "skip"
+variable.  The return type is now Boolean to be explicit that there
+are only two possible outcomes.
+
+Make sure the result of an ->alloc_msg method call always sets the
+value of *skip properly.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c  |   61 +++++++++++++++++++++++++++-----------------------
+ net/ceph/mon_client.c |    3 ++
+ net/ceph/osd_client.c |    1 
+ 3 files changed, 38 insertions(+), 27 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1659,9 +1659,8 @@ static int read_partial_message_section(
+       return 1;
+ }
+-static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
+-                              struct ceph_msg_header *hdr,
+-                              int *skip);
++static bool ceph_con_in_msg_alloc(struct ceph_connection *con,
++                              struct ceph_msg_header *hdr);
+ static int read_partial_message_pages(struct ceph_connection *con,
+@@ -1744,7 +1743,6 @@ static int read_partial_message(struct c
+       int ret;
+       unsigned front_len, middle_len, data_len;
+       bool do_datacrc = !con->msgr->nocrc;
+-      int skip;
+       u64 seq;
+       u32 crc;
+@@ -1797,9 +1795,7 @@ static int read_partial_message(struct c
+       if (!con->in_msg) {
+               dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
+                    con->in_hdr.front_len, con->in_hdr.data_len);
+-              skip = 0;
+-              con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
+-              if (skip) {
++              if (ceph_con_in_msg_alloc(con, &con->in_hdr)) {
+                       /* skip this message */
+                       dout("alloc_msg said skip message\n");
+                       BUG_ON(con->in_msg);
+@@ -2581,46 +2577,57 @@ static int ceph_alloc_middle(struct ceph
+ }
+ /*
+- * Generic message allocator, for incoming messages.
++ * Allocate a message for receiving an incoming message on a
++ * connection, and save the result in con->in_msg.  Uses the
++ * connection's private alloc_msg op if available.
++ *
++ * Returns true if the message should be skipped, false otherwise.
++ * If true is returned (skip message), con->in_msg will be NULL.
++ * If false is returned, con->in_msg will contain a pointer to the
++ * newly-allocated message, or NULL in case of memory exhaustion.
+  */
+-static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
+-                              struct ceph_msg_header *hdr,
+-                              int *skip)
++static bool ceph_con_in_msg_alloc(struct ceph_connection *con,
++                              struct ceph_msg_header *hdr)
+ {
+       int type = le16_to_cpu(hdr->type);
+       int front_len = le32_to_cpu(hdr->front_len);
+       int middle_len = le32_to_cpu(hdr->middle_len);
+-      struct ceph_msg *msg = NULL;
+       int ret;
++      BUG_ON(con->in_msg != NULL);
++
+       if (con->ops->alloc_msg) {
++              int skip = 0;
++
+               mutex_unlock(&con->mutex);
+-              msg = con->ops->alloc_msg(con, hdr, skip);
++              con->in_msg = con->ops->alloc_msg(con, hdr, &skip);
+               mutex_lock(&con->mutex);
+-              if (!msg || *skip)
+-                      return NULL;
++              if (skip)
++                      con->in_msg = NULL;
++
++              if (!con->in_msg)
++                      return skip != 0;
+       }
+-      if (!msg) {
+-              *skip = 0;
+-              msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
+-              if (!msg) {
++      if (!con->in_msg) {
++              con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
++              if (!con->in_msg) {
+                       pr_err("unable to allocate msg type %d len %d\n",
+                              type, front_len);
+-                      return NULL;
++                      return false;
+               }
+-              msg->page_alignment = le16_to_cpu(hdr->data_off);
++              con->in_msg->page_alignment = le16_to_cpu(hdr->data_off);
+       }
+-      memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
++      memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
+-      if (middle_len && !msg->middle) {
+-              ret = ceph_alloc_middle(con, msg);
++      if (middle_len && !con->in_msg->middle) {
++              ret = ceph_alloc_middle(con, con->in_msg);
+               if (ret < 0) {
+-                      ceph_msg_put(msg);
+-                      return NULL;
++                      ceph_msg_put(con->in_msg);
++                      con->in_msg = NULL;
+               }
+       }
+-      return msg;
++      return false;
+ }
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -442,6 +442,7 @@ static struct ceph_msg *get_generic_repl
+               m = NULL;
+       } else {
+               dout("get_generic_reply %lld got %p\n", tid, req->reply);
++              *skip = 0;
+               m = ceph_msg_get(req->reply);
+               /*
+                * we don't need to track the connection reading into
+@@ -990,6 +991,8 @@ static struct ceph_msg *mon_alloc_msg(st
+       case CEPH_MSG_MDS_MAP:
+       case CEPH_MSG_OSD_MAP:
+               m = ceph_msg_new(type, front_len, GFP_NOFS, false);
++              if (!m)
++                      return NULL;    /* ENOMEM--return skip == 0 */
+               break;
+       }
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -2077,6 +2077,7 @@ static struct ceph_msg *alloc_msg(struct
+       int type = le16_to_cpu(hdr->type);
+       int front = le32_to_cpu(hdr->front_len);
++      *skip = 0;
+       switch (type) {
+       case CEPH_MSG_OSD_MAP:
+       case CEPH_MSG_WATCH_NOTIFY:
diff --git a/queue-3.4/0052-libceph-have-messages-point-to-their-connection.patch b/queue-3.4/0052-libceph-have-messages-point-to-their-connection.patch
new file mode 100644 (file)
index 0000000..d76394c
--- /dev/null
@@ -0,0 +1,154 @@
+From 48588cda937cf200d7cf89fbb74e59449a389de8 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Fri, 1 Jun 2012 14:56:43 -0500
+Subject: libceph: have messages point to their connection
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 38941f8031bf042dba3ced6394ba3a3b16c244ea)
+
+When a ceph message is queued for sending it is placed on a list of
+pending messages (ceph_connection->out_queue).  When they are
+actually sent over the wire, they are moved from that list to
+another (ceph_connection->out_sent).  When acknowledgement for the
+message is received, it is removed from the sent messages list.
+
+During that entire time the message is "in the possession" of a
+single ceph connection.  Keep track of that connection in the
+message.  This will be used in the next patch (and is a helpful
+bit of information for debugging anyway).
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/messenger.h |    3 +++
+ net/ceph/messenger.c           |   27 +++++++++++++++++++++++++--
+ 2 files changed, 28 insertions(+), 2 deletions(-)
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -77,7 +77,10 @@ struct ceph_msg {
+       unsigned nr_pages;              /* size of page array */
+       unsigned page_alignment;        /* io offset in first page */
+       struct ceph_pagelist *pagelist; /* instead of pages */
++
++      struct ceph_connection *con;
+       struct list_head list_head;
++
+       struct kref kref;
+       struct bio  *bio;               /* instead of pages/pagelist */
+       struct bio  *bio_iter;          /* bio iterator */
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -414,6 +414,9 @@ static int con_close_socket(struct ceph_
+ static void ceph_msg_remove(struct ceph_msg *msg)
+ {
+       list_del_init(&msg->list_head);
++      BUG_ON(msg->con == NULL);
++      msg->con = NULL;
++
+       ceph_msg_put(msg);
+ }
+ static void ceph_msg_remove_list(struct list_head *head)
+@@ -433,6 +436,8 @@ static void reset_connection(struct ceph
+       ceph_msg_remove_list(&con->out_sent);
+       if (con->in_msg) {
++              BUG_ON(con->in_msg->con != con);
++              con->in_msg->con = NULL;
+               ceph_msg_put(con->in_msg);
+               con->in_msg = NULL;
+       }
+@@ -625,8 +630,10 @@ static void prepare_write_message(struct
+                       &con->out_temp_ack);
+       }
++      BUG_ON(list_empty(&con->out_queue));
+       m = list_first_entry(&con->out_queue, struct ceph_msg, list_head);
+       con->out_msg = m;
++      BUG_ON(m->con != con);
+       /* put message on sent list */
+       ceph_msg_get(m);
+@@ -1810,6 +1817,8 @@ static int read_partial_message(struct c
+                               "error allocating memory for incoming message";
+                       return -ENOMEM;
+               }
++
++              BUG_ON(con->in_msg->con != con);
+               m = con->in_msg;
+               m->front.iov_len = 0;    /* haven't read it yet */
+               if (m->middle)
+@@ -1905,6 +1914,8 @@ static void process_message(struct ceph_
+ {
+       struct ceph_msg *msg;
++      BUG_ON(con->in_msg->con != con);
++      con->in_msg->con = NULL;
+       msg = con->in_msg;
+       con->in_msg = NULL;
+@@ -2264,6 +2275,8 @@ static void ceph_fault(struct ceph_conne
+       con_close_socket(con);
+       if (con->in_msg) {
++              BUG_ON(con->in_msg->con != con);
++              con->in_msg->con = NULL;
+               ceph_msg_put(con->in_msg);
+               con->in_msg = NULL;
+       }
+@@ -2382,6 +2395,8 @@ void ceph_con_send(struct ceph_connectio
+       /* queue */
+       mutex_lock(&con->mutex);
++      BUG_ON(msg->con != NULL);
++      msg->con = con;
+       BUG_ON(!list_empty(&msg->list_head));
+       list_add_tail(&msg->list_head, &con->out_queue);
+       dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg,
+@@ -2407,13 +2422,16 @@ void ceph_con_revoke(struct ceph_connect
+ {
+       mutex_lock(&con->mutex);
+       if (!list_empty(&msg->list_head)) {
+-              dout("con_revoke %p msg %p - was on queue\n", con, msg);
++              dout("%s %p msg %p - was on queue\n", __func__, con, msg);
+               list_del_init(&msg->list_head);
++              BUG_ON(msg->con == NULL);
++              msg->con = NULL;
++
+               ceph_msg_put(msg);
+               msg->hdr.seq = 0;
+       }
+       if (con->out_msg == msg) {
+-              dout("con_revoke %p msg %p - was sending\n", con, msg);
++              dout("%s %p msg %p - was sending\n", __func__, con, msg);
+               con->out_msg = NULL;
+               if (con->out_kvec_is_msg) {
+                       con->out_skip = con->out_kvec_bytes;
+@@ -2482,6 +2500,8 @@ struct ceph_msg *ceph_msg_new(int type,
+       if (m == NULL)
+               goto out;
+       kref_init(&m->kref);
++
++      m->con = NULL;
+       INIT_LIST_HEAD(&m->list_head);
+       m->hdr.tid = 0;
+@@ -2602,6 +2622,8 @@ static bool ceph_con_in_msg_alloc(struct
+               mutex_unlock(&con->mutex);
+               con->in_msg = con->ops->alloc_msg(con, hdr, &skip);
+               mutex_lock(&con->mutex);
++              if (con->in_msg)
++                      con->in_msg->con = con;
+               if (skip)
+                       con->in_msg = NULL;
+@@ -2615,6 +2637,7 @@ static bool ceph_con_in_msg_alloc(struct
+                              type, front_len);
+                       return false;
+               }
++              con->in_msg->con = con;
+               con->in_msg->page_alignment = le16_to_cpu(hdr->data_off);
+       }
+       memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
diff --git a/queue-3.4/0053-libceph-have-messages-take-a-connection-reference.patch b/queue-3.4/0053-libceph-have-messages-take-a-connection-reference.patch
new file mode 100644 (file)
index 0000000..88242d4
--- /dev/null
@@ -0,0 +1,174 @@
+From 898bf051066aaecc79487425b6614fb8e0efca4a Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 4 Jun 2012 14:43:33 -0500
+Subject: libceph: have messages take a connection reference
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 92ce034b5a740046cc643a21ea21eaad589e0043)
+
+There are essentially two types of ceph messages: incoming and
+outgoing.  Outgoing messages are always allocated via ceph_msg_new(),
+and at the time of their allocation they are not associated with any
+particular connection.  Incoming messages are always allocated via
+ceph_con_in_msg_alloc(), and they are initially associated with the
+connection from which incoming data will be placed into the message.
+
+When an outgoing message gets sent, it becomes associated with a
+connection and remains that way until the message is successfully
+sent.  The association of an incoming message goes away at the point
+it is sent to an upper layer via a con->ops->dispatch method.
+
+This patch implements reference counting for all ceph messages, such
+that every message holds a reference (and a pointer) to a connection
+if and only if it is associated with that connection (as described
+above).
+
+For background, here is an explanation of the ceph message
+lifecycle, emphasizing when an association exists between a message
+and a connection.
+
+Outgoing Messages
+An outgoing message is "owned" by its allocator, from the time it is
+allocated in ceph_msg_new() up to the point it gets queued for
+sending in ceph_con_send().  Prior to that point the message's
+msg->con pointer is null; at the point it is queued for sending its
+message pointer is assigned to refer to the connection.  At that
+time the message is inserted into a connection's out_queue list.
+
+When a message on the out_queue list has been sent to the socket
+layer to be put on the wire, it is transferred out of that list and
+into the connection's out_sent list.  At that point it is still owned
+by the connection, and will remain so until an acknowledgement is
+received from the recipient that indicates the message was
+successfully transferred.  When such an acknowledgement is received
+(in process_ack()), the message is removed from its list (in
+ceph_msg_remove()), at which point it is no longer associated with
+the connection.
+
+So basically, any time a message is on one of a connection's lists,
+it is associated with that connection.  Reference counting outgoing
+messages can thus be done at the points a message is added to the
+out_queue (in ceph_con_send()) and the point it is removed from
+either its two lists (in ceph_msg_remove())--at which point its
+connection pointer becomes null.
+
+Incoming Messages
+When an incoming message on a connection is getting read (in
+read_partial_message()) and there is no message in con->in_msg,
+a new one is allocated using ceph_con_in_msg_alloc().  At that
+point the message is associated with the connection.  Once that
+message has been completely and successfully read, it is passed to
+upper layer code using the connection's con->ops->dispatch method.
+At that point the association between the message and the connection
+no longer exists.
+
+Reference counting of connections for incoming messages can be done
+by taking a reference to the connection when the message gets
+allocated, and releasing that reference when it gets handed off
+using the dispatch method.
+
+We should never fail to get a connection reference for a
+message--the since the caller should already hold one.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |   24 ++++++++++++++++++------
+ 1 file changed, 18 insertions(+), 6 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -415,6 +415,7 @@ static void ceph_msg_remove(struct ceph_
+ {
+       list_del_init(&msg->list_head);
+       BUG_ON(msg->con == NULL);
++      ceph_con_put(msg->con);
+       msg->con = NULL;
+       ceph_msg_put(msg);
+@@ -440,6 +441,7 @@ static void reset_connection(struct ceph
+               con->in_msg->con = NULL;
+               ceph_msg_put(con->in_msg);
+               con->in_msg = NULL;
++              ceph_con_put(con->in_msg->con);
+       }
+       con->connect_seq = 0;
+@@ -1918,6 +1920,7 @@ static void process_message(struct ceph_
+       con->in_msg->con = NULL;
+       msg = con->in_msg;
+       con->in_msg = NULL;
++      ceph_con_put(con);
+       /* if first message, set peer_name */
+       if (con->peer_name.type == 0)
+@@ -2279,6 +2282,7 @@ static void ceph_fault(struct ceph_conne
+               con->in_msg->con = NULL;
+               ceph_msg_put(con->in_msg);
+               con->in_msg = NULL;
++              ceph_con_put(con);
+       }
+       /* Requeue anything that hasn't been acked */
+@@ -2395,8 +2399,11 @@ void ceph_con_send(struct ceph_connectio
+       /* queue */
+       mutex_lock(&con->mutex);
++
+       BUG_ON(msg->con != NULL);
+-      msg->con = con;
++      msg->con = ceph_con_get(con);
++      BUG_ON(msg->con == NULL);
++
+       BUG_ON(!list_empty(&msg->list_head));
+       list_add_tail(&msg->list_head, &con->out_queue);
+       dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg,
+@@ -2425,10 +2432,11 @@ void ceph_con_revoke(struct ceph_connect
+               dout("%s %p msg %p - was on queue\n", __func__, con, msg);
+               list_del_init(&msg->list_head);
+               BUG_ON(msg->con == NULL);
++              ceph_con_put(msg->con);
+               msg->con = NULL;
++              msg->hdr.seq = 0;
+               ceph_msg_put(msg);
+-              msg->hdr.seq = 0;
+       }
+       if (con->out_msg == msg) {
+               dout("%s %p msg %p - was sending\n", __func__, con, msg);
+@@ -2437,8 +2445,9 @@ void ceph_con_revoke(struct ceph_connect
+                       con->out_skip = con->out_kvec_bytes;
+                       con->out_kvec_is_msg = false;
+               }
+-              ceph_msg_put(msg);
+               msg->hdr.seq = 0;
++
++              ceph_msg_put(msg);
+       }
+       mutex_unlock(&con->mutex);
+ }
+@@ -2622,8 +2631,10 @@ static bool ceph_con_in_msg_alloc(struct
+               mutex_unlock(&con->mutex);
+               con->in_msg = con->ops->alloc_msg(con, hdr, &skip);
+               mutex_lock(&con->mutex);
+-              if (con->in_msg)
+-                      con->in_msg->con = con;
++              if (con->in_msg) {
++                      con->in_msg->con = ceph_con_get(con);
++                      BUG_ON(con->in_msg->con == NULL);
++              }
+               if (skip)
+                       con->in_msg = NULL;
+@@ -2637,7 +2648,8 @@ static bool ceph_con_in_msg_alloc(struct
+                              type, front_len);
+                       return false;
+               }
+-              con->in_msg->con = con;
++              con->in_msg->con = ceph_con_get(con);
++              BUG_ON(con->in_msg->con == NULL);
+               con->in_msg->page_alignment = le16_to_cpu(hdr->data_off);
+       }
+       memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
diff --git a/queue-3.4/0054-libceph-make-ceph_con_revoke-a-msg-operation.patch b/queue-3.4/0054-libceph-make-ceph_con_revoke-a-msg-operation.patch
new file mode 100644 (file)
index 0000000..783fa3f
--- /dev/null
@@ -0,0 +1,121 @@
+From 0ca876aefb93f1bda1b194af80ed90def4a21768 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Fri, 1 Jun 2012 14:56:43 -0500
+Subject: libceph: make ceph_con_revoke() a msg operation
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 6740a845b2543cc46e1902ba21bac743fbadd0dc)
+
+ceph_con_revoke() is passed both a message and a ceph connection.
+Now that any message associated with a connection holds a pointer
+to that connection, there's no need to provide the connection when
+revoking a message.
+
+This has the added benefit of precluding the possibility of the
+providing the wrong connection pointer.  If the message's connection
+pointer is null, it is not being tracked by any connection, so
+revoking it is a no-op.  This is supported as a convenience for
+upper layers, so they can revoke a message that is not actually
+"in flight."
+
+Rename the function ceph_msg_revoke() to reflect that it is really
+an operation on a message, not a connection.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/messenger.h |    3 ++-
+ net/ceph/messenger.c           |    7 ++++++-
+ net/ceph/mon_client.c          |    8 ++++----
+ net/ceph/osd_client.c          |    4 ++--
+ 4 files changed, 14 insertions(+), 8 deletions(-)
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -231,7 +231,8 @@ extern void ceph_con_open(struct ceph_co
+ extern bool ceph_con_opened(struct ceph_connection *con);
+ extern void ceph_con_close(struct ceph_connection *con);
+ extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
+-extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
++
++extern void ceph_msg_revoke(struct ceph_msg *msg);
+ extern void ceph_con_revoke_message(struct ceph_connection *con,
+                                 struct ceph_msg *msg);
+ extern void ceph_con_keepalive(struct ceph_connection *con);
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2425,8 +2425,13 @@ EXPORT_SYMBOL(ceph_con_send);
+ /*
+  * Revoke a message that was previously queued for send
+  */
+-void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
++void ceph_msg_revoke(struct ceph_msg *msg)
+ {
++      struct ceph_connection *con = msg->con;
++
++      if (!con)
++              return;         /* Message not in our possession */
++
+       mutex_lock(&con->mutex);
+       if (!list_empty(&msg->list_head)) {
+               dout("%s %p msg %p - was on queue\n", __func__, con, msg);
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -106,7 +106,7 @@ static void __send_prepared_auth_request
+       monc->pending_auth = 1;
+       monc->m_auth->front.iov_len = len;
+       monc->m_auth->hdr.front_len = cpu_to_le32(len);
+-      ceph_con_revoke(&monc->con, monc->m_auth);
++      ceph_msg_revoke(monc->m_auth);
+       ceph_msg_get(monc->m_auth);  /* keep our ref */
+       ceph_con_send(&monc->con, monc->m_auth);
+ }
+@@ -117,7 +117,7 @@ static void __send_prepared_auth_request
+ static void __close_session(struct ceph_mon_client *monc)
+ {
+       dout("__close_session closing mon%d\n", monc->cur_mon);
+-      ceph_con_revoke(&monc->con, monc->m_auth);
++      ceph_msg_revoke(monc->m_auth);
+       ceph_con_close(&monc->con);
+       monc->con.private = NULL;
+       monc->cur_mon = -1;
+@@ -229,7 +229,7 @@ static void __send_subscribe(struct ceph
+               msg->front.iov_len = p - msg->front.iov_base;
+               msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+-              ceph_con_revoke(&monc->con, msg);
++              ceph_msg_revoke(msg);
+               ceph_con_send(&monc->con, ceph_msg_get(msg));
+               monc->sub_sent = jiffies | 1;  /* never 0 */
+@@ -688,7 +688,7 @@ static void __resend_generic_request(str
+       for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) {
+               req = rb_entry(p, struct ceph_mon_generic_request, node);
+-              ceph_con_revoke(&monc->con, req->request);
++              ceph_msg_revoke(req->request);
+               ceph_con_send(&monc->con, ceph_msg_get(req->request));
+       }
+ }
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -852,7 +852,7 @@ static void __unregister_request(struct
+       if (req->r_osd) {
+               /* make sure the original request isn't in flight. */
+-              ceph_con_revoke(&req->r_osd->o_con, req->r_request);
++              ceph_msg_revoke(req->r_request);
+               list_del_init(&req->r_osd_item);
+               if (list_empty(&req->r_osd->o_requests) &&
+@@ -879,7 +879,7 @@ static void __unregister_request(struct
+ static void __cancel_request(struct ceph_osd_request *req)
+ {
+       if (req->r_sent && req->r_osd) {
+-              ceph_con_revoke(&req->r_osd->o_con, req->r_request);
++              ceph_msg_revoke(req->r_request);
+               req->r_sent = 0;
+       }
+ }
diff --git a/queue-3.4/0055-libceph-make-ceph_con_revoke_message-a-msg-op.patch b/queue-3.4/0055-libceph-make-ceph_con_revoke_message-a-msg-op.patch
new file mode 100644 (file)
index 0000000..4c9cee0
--- /dev/null
@@ -0,0 +1,120 @@
+From ede3074a05d4669f799c8887291c940e8f86849b Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Fri, 1 Jun 2012 14:56:43 -0500
+Subject: libceph: make ceph_con_revoke_message() a msg op
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 8921d114f5574c6da2cdd00749d185633ecf88f3)
+
+ceph_con_revoke_message() is passed both a message and a ceph
+connection.  A ceph_msg allocated for incoming messages on a
+connection always has a pointer to that connection, so there's no
+need to provide the connection when revoking such a message.
+
+Note that the existing logic does not preclude the message supplied
+being a null/bogus message pointer.  The only user of this interface
+is the OSD client, and the only value an osd client passes is a
+request's r_reply field.  That is always non-null (except briefly in
+an error path in ceph_osdc_alloc_request(), and that drops the
+only reference so the request won't ever have a reply to revoke).
+So we can safely assume the passed-in message is non-null, but add a
+BUG_ON() to make it very obvious we are imposing this restriction.
+
+Rename the function ceph_msg_revoke_incoming() to reflect that it is
+really an operation on an incoming message.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/messenger.h |    4 ++--
+ net/ceph/messenger.c           |   22 ++++++++++++++++------
+ net/ceph/osd_client.c          |    9 ++++-----
+ 3 files changed, 22 insertions(+), 13 deletions(-)
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -233,8 +233,8 @@ extern void ceph_con_close(struct ceph_c
+ extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
+ extern void ceph_msg_revoke(struct ceph_msg *msg);
+-extern void ceph_con_revoke_message(struct ceph_connection *con,
+-                                struct ceph_msg *msg);
++extern void ceph_msg_revoke_incoming(struct ceph_msg *msg);
++
+ extern void ceph_con_keepalive(struct ceph_connection *con);
+ extern struct ceph_connection *ceph_con_get(struct ceph_connection *con);
+ extern void ceph_con_put(struct ceph_connection *con);
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2460,17 +2460,27 @@ void ceph_msg_revoke(struct ceph_msg *ms
+ /*
+  * Revoke a message that we may be reading data into
+  */
+-void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
++void ceph_msg_revoke_incoming(struct ceph_msg *msg)
+ {
++      struct ceph_connection *con;
++
++      BUG_ON(msg == NULL);
++      if (!msg->con) {
++              dout("%s msg %p null con\n", __func__, msg);
++
++              return;         /* Message not in our possession */
++      }
++
++      con = msg->con;
+       mutex_lock(&con->mutex);
+-      if (con->in_msg && con->in_msg == msg) {
++      if (con->in_msg == msg) {
+               unsigned front_len = le32_to_cpu(con->in_hdr.front_len);
+               unsigned middle_len = le32_to_cpu(con->in_hdr.middle_len);
+               unsigned data_len = le32_to_cpu(con->in_hdr.data_len);
+               /* skip rest of message */
+-              dout("con_revoke_pages %p msg %p revoked\n", con, msg);
+-                      con->in_base_pos = con->in_base_pos -
++              dout("%s %p msg %p revoked\n", __func__, con, msg);
++              con->in_base_pos = con->in_base_pos -
+                               sizeof(struct ceph_msg_header) -
+                               front_len -
+                               middle_len -
+@@ -2481,8 +2491,8 @@ void ceph_con_revoke_message(struct ceph
+               con->in_tag = CEPH_MSGR_TAG_READY;
+               con->in_seq++;
+       } else {
+-              dout("con_revoke_pages %p msg %p pages %p no-op\n",
+-                   con, con->in_msg, msg);
++              dout("%s %p in_msg %p msg %p no-op\n",
++                   __func__, con, con->in_msg, msg);
+       }
+       mutex_unlock(&con->mutex);
+ }
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -140,10 +140,9 @@ void ceph_osdc_release_request(struct kr
+       if (req->r_request)
+               ceph_msg_put(req->r_request);
+       if (req->r_con_filling_msg) {
+-              dout("release_request revoking pages %p from con %p\n",
++              dout("%s revoking pages %p from con %p\n", __func__,
+                    req->r_pages, req->r_con_filling_msg);
+-              ceph_con_revoke_message(req->r_con_filling_msg,
+-                                    req->r_reply);
++              ceph_msg_revoke_incoming(req->r_reply);
+               req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
+       }
+       if (req->r_reply)
+@@ -2022,9 +2021,9 @@ static struct ceph_msg *get_reply(struct
+       }
+       if (req->r_con_filling_msg) {
+-              dout("get_reply revoking msg %p from old con %p\n",
++              dout("%s revoking msg %p from old con %p\n", __func__,
+                    req->r_reply, req->r_con_filling_msg);
+-              ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply);
++              ceph_msg_revoke_incoming(req->r_reply);
+               req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
+               req->r_con_filling_msg = NULL;
+       }
diff --git a/queue-3.4/0056-libceph-fix-overflow-in-__decode_pool_names.patch b/queue-3.4/0056-libceph-fix-overflow-in-__decode_pool_names.patch
new file mode 100644 (file)
index 0000000..e6eeecd
--- /dev/null
@@ -0,0 +1,51 @@
+From d87d591772b2956b9ac9e25eb499366100d2c4a8 Mon Sep 17 00:00:00 2001
+From: Xi Wang <xi.wang@gmail.com>
+Date: Wed, 6 Jun 2012 19:35:55 -0500
+Subject: libceph: fix overflow in __decode_pool_names()
+
+From: Xi Wang <xi.wang@gmail.com>
+
+(cherry picked from commit ad3b904c07dfa88603689bf9a67bffbb9b99beb5)
+
+`len' is read from network and thus needs validation.  Otherwise a
+large `len' would cause out-of-bounds access via the memcpy() call.
+In addition, len = 0xffffffff would overflow the kmalloc() size,
+leading to out-of-bounds write.
+
+This patch adds a check of `len' via ceph_decode_need().  Also use
+kstrndup rather than kmalloc/memcpy.
+
+[elder@inktank.com: added -ENOMEM return for null kstrndup() result]
+
+Signed-off-by: Xi Wang <xi.wang@gmail.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/osdmap.c |   13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/net/ceph/osdmap.c
++++ b/net/ceph/osdmap.c
+@@ -495,15 +495,16 @@ static int __decode_pool_names(void **p,
+               ceph_decode_32_safe(p, end, pool, bad);
+               ceph_decode_32_safe(p, end, len, bad);
+               dout("  pool %d len %d\n", pool, len);
++              ceph_decode_need(p, end, len, bad);
+               pi = __lookup_pg_pool(&map->pg_pools, pool);
+               if (pi) {
++                      char *name = kstrndup(*p, len, GFP_NOFS);
++
++                      if (!name)
++                              return -ENOMEM;
+                       kfree(pi->name);
+-                      pi->name = kmalloc(len + 1, GFP_NOFS);
+-                      if (pi->name) {
+-                              memcpy(pi->name, *p, len);
+-                              pi->name[len] = '\0';
+-                              dout("  name is %s\n", pi->name);
+-                      }
++                      pi->name = name;
++                      dout("  name is %s\n", pi->name);
+               }
+               *p += len;
+       }
diff --git a/queue-3.4/0057-libceph-fix-overflow-in-osdmap_decode.patch b/queue-3.4/0057-libceph-fix-overflow-in-osdmap_decode.patch
new file mode 100644 (file)
index 0000000..0810782
--- /dev/null
@@ -0,0 +1,32 @@
+From cc725c099f905095dfa2fe50c46575096ff0052d Mon Sep 17 00:00:00 2001
+From: Xi Wang <xi.wang@gmail.com>
+Date: Wed, 6 Jun 2012 19:35:55 -0500
+Subject: libceph: fix overflow in osdmap_decode()
+
+From: Xi Wang <xi.wang@gmail.com>
+
+(cherry picked from commit e91a9b639a691e0982088b5954eaafb5a25c8f1c)
+
+On 32-bit systems, a large `n' would overflow `n * sizeof(u32)' and bypass
+the check ceph_decode_need(p, end, n * sizeof(u32), bad).  It would also
+overflow the subsequent kmalloc() size, leading to out-of-bounds write.
+
+Signed-off-by: Xi Wang <xi.wang@gmail.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/osdmap.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/ceph/osdmap.c
++++ b/net/ceph/osdmap.c
+@@ -674,6 +674,9 @@ struct ceph_osdmap *osdmap_decode(void *
+               ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
+               ceph_decode_copy(p, &pgid, sizeof(pgid));
+               n = ceph_decode_32(p);
++              err = -EINVAL;
++              if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
++                      goto bad;
+               ceph_decode_need(p, end, n * sizeof(u32), bad);
+               err = -ENOMEM;
+               pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS);
diff --git a/queue-3.4/0058-libceph-fix-overflow-in-osdmap_apply_incremental.patch b/queue-3.4/0058-libceph-fix-overflow-in-osdmap_apply_incremental.patch
new file mode 100644 (file)
index 0000000..22353a2
--- /dev/null
@@ -0,0 +1,34 @@
+From 1edb6147d0adbb0d51bed7c7432f59bbc41f8c82 Mon Sep 17 00:00:00 2001
+From: Xi Wang <xi.wang@gmail.com>
+Date: Wed, 6 Jun 2012 19:35:55 -0500
+Subject: libceph: fix overflow in osdmap_apply_incremental()
+
+From: Xi Wang <xi.wang@gmail.com>
+
+(cherry picked from commit a5506049500b30dbc5edb4d07a3577477c1f3643)
+
+On 32-bit systems, a large `pglen' would overflow `pglen*sizeof(u32)'
+and bypass the check ceph_decode_need(p, end, pglen*sizeof(u32), bad).
+It would also overflow the subsequent kmalloc() size, leading to
+out-of-bounds write.
+
+Signed-off-by: Xi Wang <xi.wang@gmail.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/osdmap.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/ceph/osdmap.c
++++ b/net/ceph/osdmap.c
+@@ -900,6 +900,10 @@ struct ceph_osdmap *osdmap_apply_increme
+                       (void) __remove_pg_mapping(&map->pg_temp, pgid);
+                       /* insert */
++                      if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) {
++                              err = -EINVAL;
++                              goto bad;
++                      }
+                       pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS);
+                       if (!pg) {
+                               err = -ENOMEM;
diff --git a/queue-3.4/0059-libceph-transition-socket-state-prior-to-actual-conn.patch b/queue-3.4/0059-libceph-transition-socket-state-prior-to-actual-conn.patch
new file mode 100644 (file)
index 0000000..cd5d0f8
--- /dev/null
@@ -0,0 +1,43 @@
+From aa868bb25ad02c63c69783a9ffa1f6f1d3e98a5d Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Sat, 9 Jun 2012 14:19:21 -0700
+Subject: libceph: transition socket state prior to actual connect
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 89a86be0ce20022f6ede8bccec078dbb3d63caaa)
+
+Once we call ->connect(), we are racing against the actual
+connection, and a subsequent transition from CONNECTING ->
+CONNECTED.  Set the state to CONNECTING before that, under the
+protection of the mutex, to avoid the race.
+
+This was introduced in 928443cd9644e7cfd46f687dbeffda2d1a357ff9,
+with the original socket state code.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -321,6 +321,7 @@ static int ceph_tcp_connect(struct ceph_
+       dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
++      con_sock_state_connecting(con);
+       ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
+                                O_NONBLOCK);
+       if (ret == -EINPROGRESS) {
+@@ -336,8 +337,6 @@ static int ceph_tcp_connect(struct ceph_
+               return ret;
+       }
+       con->sock = sock;
+-      con_sock_state_connecting(con);
+-
+       return 0;
+ }
index a01044356213b32ecff1258515e1a049ef64e4d6..a2f21cda4a2c6625c5f9dbcc479ad3a4804be319 100644 (file)
@@ -92,3 +92,23 @@ selinux-fix-sel_netnode_insert-suspicious-rcu-dereference.patch
 0037-ceph-check-PG_Private-flag-before-accessing-page-pri.patch
 0038-libceph-eliminate-connection-state-DEAD.patch
 0039-libceph-kill-bad_proto-ceph-connection-op.patch
+0040-libceph-rename-socket-callbacks.patch
+0041-libceph-rename-kvec_reset-and-kvec_add-functions.patch
+0042-libceph-embed-ceph-messenger-structure-in-ceph_clien.patch
+0043-libceph-start-separating-connection-flags-from-state.patch
+0044-libceph-start-tracking-connection-socket-state.patch
+0045-libceph-provide-osd-number-when-creating-osd.patch
+0046-libceph-set-CLOSED-state-bit-in-con_init.patch
+0047-libceph-embed-ceph-connection-structure-in-mon_clien.patch
+0048-libceph-drop-connection-refcounting-for-mon_client.patch
+0049-libceph-init-monitor-connection-when-opening.patch
+0050-libceph-fully-initialize-connection-in-con_init.patch
+0051-libceph-tweak-ceph_alloc_msg.patch
+0052-libceph-have-messages-point-to-their-connection.patch
+0053-libceph-have-messages-take-a-connection-reference.patch
+0054-libceph-make-ceph_con_revoke-a-msg-operation.patch
+0055-libceph-make-ceph_con_revoke_message-a-msg-op.patch
+0056-libceph-fix-overflow-in-__decode_pool_names.patch
+0057-libceph-fix-overflow-in-osdmap_decode.patch
+0058-libceph-fix-overflow-in-osdmap_apply_incremental.patch
+0059-libceph-transition-socket-state-prior-to-actual-conn.patch