From dd878a8933732fed99bb72b2ce08350b8efc87e7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 21 Nov 2012 12:04:45 -0800 Subject: [PATCH] 3.4-stable patches added patches: 0040-libceph-rename-socket-callbacks.patch 0041-libceph-rename-kvec_reset-and-kvec_add-functions.patch 0042-libceph-embed-ceph-messenger-structure-in-ceph_clien.patch 0043-libceph-start-separating-connection-flags-from-state.patch 0044-libceph-start-tracking-connection-socket-state.patch 0045-libceph-provide-osd-number-when-creating-osd.patch 0046-libceph-set-CLOSED-state-bit-in-con_init.patch 0047-libceph-embed-ceph-connection-structure-in-mon_clien.patch 0048-libceph-drop-connection-refcounting-for-mon_client.patch 0049-libceph-init-monitor-connection-when-opening.patch 0050-libceph-fully-initialize-connection-in-con_init.patch 0051-libceph-tweak-ceph_alloc_msg.patch 0052-libceph-have-messages-point-to-their-connection.patch 0053-libceph-have-messages-take-a-connection-reference.patch 0054-libceph-make-ceph_con_revoke-a-msg-operation.patch 0055-libceph-make-ceph_con_revoke_message-a-msg-op.patch 0056-libceph-fix-overflow-in-__decode_pool_names.patch 0057-libceph-fix-overflow-in-osdmap_decode.patch 0058-libceph-fix-overflow-in-osdmap_apply_incremental.patch 0059-libceph-transition-socket-state-prior-to-actual-conn.patch --- ...0040-libceph-rename-socket-callbacks.patch | 112 ++++++++ ...me-kvec_reset-and-kvec_add-functions.patch | 183 +++++++++++++ ...ph-messenger-structure-in-ceph_clien.patch | 208 ++++++++++++++ ...parating-connection-flags-from-state.patch | 257 ++++++++++++++++++ ...art-tracking-connection-socket-state.patch | 199 ++++++++++++++ ...provide-osd-number-when-creating-osd.patch | 63 +++++ ...eph-set-CLOSED-state-bit-in-con_init.patch | 83 ++++++ ...ph-connection-structure-in-mon_clien.patch | 178 ++++++++++++ ...onnection-refcounting-for-mon_client.patch | 48 ++++ ...init-monitor-connection-when-opening.patch | 64 +++++ ...ly-initialize-connection-in-con_init.patch | 116 ++++++++ .../0051-libceph-tweak-ceph_alloc_msg.patch | 171 ++++++++++++ ...e-messages-point-to-their-connection.patch | 154 +++++++++++ ...messages-take-a-connection-reference.patch | 174 ++++++++++++ ...make-ceph_con_revoke-a-msg-operation.patch | 121 +++++++++ ...ake-ceph_con_revoke_message-a-msg-op.patch | 120 ++++++++ ...-fix-overflow-in-__decode_pool_names.patch | 51 ++++ ...ibceph-fix-overflow-in-osdmap_decode.patch | 32 +++ ...overflow-in-osdmap_apply_incremental.patch | 34 +++ ...on-socket-state-prior-to-actual-conn.patch | 43 +++ queue-3.4/series | 20 ++ 21 files changed, 2431 insertions(+) create mode 100644 queue-3.4/0040-libceph-rename-socket-callbacks.patch create mode 100644 queue-3.4/0041-libceph-rename-kvec_reset-and-kvec_add-functions.patch create mode 100644 queue-3.4/0042-libceph-embed-ceph-messenger-structure-in-ceph_clien.patch create mode 100644 queue-3.4/0043-libceph-start-separating-connection-flags-from-state.patch create mode 100644 queue-3.4/0044-libceph-start-tracking-connection-socket-state.patch create mode 100644 queue-3.4/0045-libceph-provide-osd-number-when-creating-osd.patch create mode 100644 queue-3.4/0046-libceph-set-CLOSED-state-bit-in-con_init.patch create mode 100644 queue-3.4/0047-libceph-embed-ceph-connection-structure-in-mon_clien.patch create mode 100644 queue-3.4/0048-libceph-drop-connection-refcounting-for-mon_client.patch create mode 100644 queue-3.4/0049-libceph-init-monitor-connection-when-opening.patch create mode 100644 queue-3.4/0050-libceph-fully-initialize-connection-in-con_init.patch create mode 100644 queue-3.4/0051-libceph-tweak-ceph_alloc_msg.patch create mode 100644 queue-3.4/0052-libceph-have-messages-point-to-their-connection.patch create mode 100644 queue-3.4/0053-libceph-have-messages-take-a-connection-reference.patch create mode 100644 queue-3.4/0054-libceph-make-ceph_con_revoke-a-msg-operation.patch create mode 100644 queue-3.4/0055-libceph-make-ceph_con_revoke_message-a-msg-op.patch create mode 100644 queue-3.4/0056-libceph-fix-overflow-in-__decode_pool_names.patch create mode 100644 queue-3.4/0057-libceph-fix-overflow-in-osdmap_decode.patch create mode 100644 queue-3.4/0058-libceph-fix-overflow-in-osdmap_apply_incremental.patch create mode 100644 queue-3.4/0059-libceph-transition-socket-state-prior-to-actual-conn.patch diff --git a/queue-3.4/0040-libceph-rename-socket-callbacks.patch b/queue-3.4/0040-libceph-rename-socket-callbacks.patch new file mode 100644 index 00000000000..56b904e6586 --- /dev/null +++ b/queue-3.4/0040-libceph-rename-socket-callbacks.patch @@ -0,0 +1,112 @@ +From 5566f701581eecb9bb825d4db233256106ae9bd6 Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Tue, 22 May 2012 11:41:43 -0500 +Subject: libceph: rename socket callbacks + +From: Alex Elder + +(cherry picked from commit 327800bdc2cb9b71f4b458ca07aa9d522668dde0) + +Change the names of the three socket callback functions to make it +more obvious they're specifically associated with a connection's +socket (not the ceph connection that uses it). + +Signed-off-by: Alex Elder +Reviewed-by: Yehuda Sadeh +Reviewed-by: Sage Weil +Signed-off-by: Greg Kroah-Hartman +--- + net/ceph/messenger.c | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -153,46 +153,46 @@ EXPORT_SYMBOL(ceph_msgr_flush); + */ + + /* data available on socket, or listen socket received a connect */ +-static void ceph_data_ready(struct sock *sk, int count_unused) ++static void ceph_sock_data_ready(struct sock *sk, int count_unused) + { + struct ceph_connection *con = sk->sk_user_data; + + if (sk->sk_state != TCP_CLOSE_WAIT) { +- dout("ceph_data_ready on %p state = %lu, queueing work\n", ++ dout("%s on %p state = %lu, queueing work\n", __func__, + con, con->state); + queue_con(con); + } + } + + /* socket has buffer space for writing */ +-static void ceph_write_space(struct sock *sk) ++static void ceph_sock_write_space(struct sock *sk) + { + struct ceph_connection *con = sk->sk_user_data; + + /* only queue to workqueue if there is data we want to write, + * and there is sufficient space in the socket buffer to accept +- * more data. clear SOCK_NOSPACE so that ceph_write_space() ++ * more data. clear SOCK_NOSPACE so that ceph_sock_write_space() + * doesn't get called again until try_write() fills the socket + * buffer. See net/ipv4/tcp_input.c:tcp_check_space() + * and net/core/stream.c:sk_stream_write_space(). + */ + if (test_bit(WRITE_PENDING, &con->state)) { + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { +- dout("ceph_write_space %p queueing write work\n", con); ++ dout("%s %p queueing write work\n", __func__, con); + clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + queue_con(con); + } + } else { +- dout("ceph_write_space %p nothing to write\n", con); ++ dout("%s %p nothing to write\n", __func__, con); + } + } + + /* socket's state has changed */ +-static void ceph_state_change(struct sock *sk) ++static void ceph_sock_state_change(struct sock *sk) + { + struct ceph_connection *con = sk->sk_user_data; + +- dout("ceph_state_change %p state = %lu sk_state = %u\n", ++ dout("%s %p state = %lu sk_state = %u\n", __func__, + con, con->state, sk->sk_state); + + if (test_bit(CLOSED, &con->state)) +@@ -200,9 +200,9 @@ static void ceph_state_change(struct soc + + switch (sk->sk_state) { + case TCP_CLOSE: +- dout("ceph_state_change TCP_CLOSE\n"); ++ dout("%s TCP_CLOSE\n", __func__); + case TCP_CLOSE_WAIT: +- dout("ceph_state_change TCP_CLOSE_WAIT\n"); ++ dout("%s TCP_CLOSE_WAIT\n", __func__); + if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) { + if (test_bit(CONNECTING, &con->state)) + con->error_msg = "connection failed"; +@@ -212,7 +212,7 @@ static void ceph_state_change(struct soc + } + break; + case TCP_ESTABLISHED: +- dout("ceph_state_change TCP_ESTABLISHED\n"); ++ dout("%s TCP_ESTABLISHED\n", __func__); + queue_con(con); + break; + default: /* Everything else is uninteresting */ +@@ -228,9 +228,9 @@ static void set_sock_callbacks(struct so + { + struct sock *sk = sock->sk; + sk->sk_user_data = con; +- sk->sk_data_ready = ceph_data_ready; +- sk->sk_write_space = ceph_write_space; +- sk->sk_state_change = ceph_state_change; ++ sk->sk_data_ready = ceph_sock_data_ready; ++ sk->sk_write_space = ceph_sock_write_space; ++ sk->sk_state_change = ceph_sock_state_change; + } + + diff --git a/queue-3.4/0041-libceph-rename-kvec_reset-and-kvec_add-functions.patch b/queue-3.4/0041-libceph-rename-kvec_reset-and-kvec_add-functions.patch new file mode 100644 index 00000000000..f7c362473af --- /dev/null +++ b/queue-3.4/0041-libceph-rename-kvec_reset-and-kvec_add-functions.patch @@ -0,0 +1,183 @@ +From 2a06b676ea95fd6a85292a8b93f205867f4cbfef Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Wed, 23 May 2012 14:35:23 -0500 +Subject: libceph: rename kvec_reset and kvec_add functions + +From: Alex Elder + +(cherry picked from commit e22004235a900213625acd6583ac913d5a30c155) + +The functions ceph_con_out_kvec_reset() and ceph_con_out_kvec_add() +are entirely private functions, so drop the "ceph_" prefix in their +name to make them slightly more wieldy. + +Signed-off-by: Alex Elder +Reviewed-by: Yehuda Sadeh +Reviewed-by: Sage Weil +Signed-off-by: Greg Kroah-Hartman +--- + net/ceph/messenger.c | 48 ++++++++++++++++++++++++------------------------ + 1 file changed, 24 insertions(+), 24 deletions(-) + +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -486,14 +486,14 @@ static u32 get_global_seq(struct ceph_me + return ret; + } + +-static void ceph_con_out_kvec_reset(struct ceph_connection *con) ++static void con_out_kvec_reset(struct ceph_connection *con) + { + con->out_kvec_left = 0; + con->out_kvec_bytes = 0; + con->out_kvec_cur = &con->out_kvec[0]; + } + +-static void ceph_con_out_kvec_add(struct ceph_connection *con, ++static void con_out_kvec_add(struct ceph_connection *con, + size_t size, void *data) + { + int index; +@@ -534,7 +534,7 @@ static void prepare_write_message(struct + struct ceph_msg *m; + u32 crc; + +- ceph_con_out_kvec_reset(con); ++ con_out_kvec_reset(con); + con->out_kvec_is_msg = true; + con->out_msg_done = false; + +@@ -542,9 +542,9 @@ static void prepare_write_message(struct + * TCP packet that's a good thing. */ + if (con->in_seq > con->in_seq_acked) { + con->in_seq_acked = con->in_seq; +- ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); ++ con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); + con->out_temp_ack = cpu_to_le64(con->in_seq_acked); +- ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack), ++ con_out_kvec_add(con, sizeof (con->out_temp_ack), + &con->out_temp_ack); + } + +@@ -576,12 +576,12 @@ static void prepare_write_message(struct + BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); + + /* tag + hdr + front + middle */ +- ceph_con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); +- ceph_con_out_kvec_add(con, sizeof (m->hdr), &m->hdr); +- ceph_con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); ++ con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); ++ con_out_kvec_add(con, sizeof (m->hdr), &m->hdr); ++ con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); + + if (m->middle) +- ceph_con_out_kvec_add(con, m->middle->vec.iov_len, ++ con_out_kvec_add(con, m->middle->vec.iov_len, + m->middle->vec.iov_base); + + /* fill in crc (except data pages), footer */ +@@ -630,12 +630,12 @@ static void prepare_write_ack(struct cep + con->in_seq_acked, con->in_seq); + con->in_seq_acked = con->in_seq; + +- ceph_con_out_kvec_reset(con); ++ con_out_kvec_reset(con); + +- ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); ++ con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); + + con->out_temp_ack = cpu_to_le64(con->in_seq_acked); +- ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack), ++ con_out_kvec_add(con, sizeof (con->out_temp_ack), + &con->out_temp_ack); + + con->out_more = 1; /* more will follow.. eventually.. */ +@@ -648,8 +648,8 @@ static void prepare_write_ack(struct cep + static void prepare_write_keepalive(struct ceph_connection *con) + { + dout("prepare_write_keepalive %p\n", con); +- ceph_con_out_kvec_reset(con); +- ceph_con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); ++ con_out_kvec_reset(con); ++ con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); + set_bit(WRITE_PENDING, &con->state); + } + +@@ -694,8 +694,8 @@ static struct ceph_auth_handshake *get_c + */ + static void prepare_write_banner(struct ceph_connection *con) + { +- ceph_con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER); +- ceph_con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr), ++ con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER); ++ con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr), + &con->msgr->my_enc_addr); + + con->out_more = 0; +@@ -742,10 +742,10 @@ static int prepare_write_connect(struct + con->out_connect.authorizer_len = auth ? + cpu_to_le32(auth->authorizer_buf_len) : 0; + +- ceph_con_out_kvec_add(con, sizeof (con->out_connect), ++ con_out_kvec_add(con, sizeof (con->out_connect), + &con->out_connect); + if (auth && auth->authorizer_buf_len) +- ceph_con_out_kvec_add(con, auth->authorizer_buf_len, ++ con_out_kvec_add(con, auth->authorizer_buf_len, + auth->authorizer_buf); + + con->out_more = 0; +@@ -939,7 +939,7 @@ static int write_partial_msg_pages(struc + /* prepare and queue up footer, too */ + if (!do_datacrc) + con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; +- ceph_con_out_kvec_reset(con); ++ con_out_kvec_reset(con); + prepare_write_message_footer(con); + ret = 1; + out: +@@ -1402,7 +1402,7 @@ static int process_connect(struct ceph_c + return -1; + } + con->auth_retry = 1; +- ceph_con_out_kvec_reset(con); ++ con_out_kvec_reset(con); + ret = prepare_write_connect(con); + if (ret < 0) + return ret; +@@ -1423,7 +1423,7 @@ static int process_connect(struct ceph_c + ENTITY_NAME(con->peer_name), + ceph_pr_addr(&con->peer_addr.in_addr)); + reset_connection(con); +- ceph_con_out_kvec_reset(con); ++ con_out_kvec_reset(con); + ret = prepare_write_connect(con); + if (ret < 0) + return ret; +@@ -1449,7 +1449,7 @@ static int process_connect(struct ceph_c + le32_to_cpu(con->out_connect.connect_seq), + le32_to_cpu(con->in_reply.connect_seq)); + con->connect_seq = le32_to_cpu(con->in_reply.connect_seq); +- ceph_con_out_kvec_reset(con); ++ con_out_kvec_reset(con); + ret = prepare_write_connect(con); + if (ret < 0) + return ret; +@@ -1466,7 +1466,7 @@ static int process_connect(struct ceph_c + le32_to_cpu(con->in_reply.global_seq)); + get_global_seq(con->msgr, + le32_to_cpu(con->in_reply.global_seq)); +- ceph_con_out_kvec_reset(con); ++ con_out_kvec_reset(con); + ret = prepare_write_connect(con); + if (ret < 0) + return ret; +@@ -1873,7 +1873,7 @@ more: + + /* open the socket first? */ + if (con->sock == NULL) { +- ceph_con_out_kvec_reset(con); ++ con_out_kvec_reset(con); + prepare_write_banner(con); + ret = prepare_write_connect(con); + if (ret < 0) diff --git a/queue-3.4/0042-libceph-embed-ceph-messenger-structure-in-ceph_clien.patch b/queue-3.4/0042-libceph-embed-ceph-messenger-structure-in-ceph_clien.patch new file mode 100644 index 00000000000..0317e97416f --- /dev/null +++ b/queue-3.4/0042-libceph-embed-ceph-messenger-structure-in-ceph_clien.patch @@ -0,0 +1,208 @@ +From ab20b55d471452332ce9e1b76ea7a522999e2055 Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Sat, 26 May 2012 23:26:43 -0500 +Subject: libceph: embed ceph messenger structure in ceph_client + +From: Alex Elder + +(cherry picked from commit 15d9882c336db2db73ccf9871ae2398e452f694c) + +A ceph client has a pointer to a ceph messenger structure in it. +There is always exactly one ceph messenger for a ceph client, so +there is no need to allocate it separate from the ceph client +structure. + +Switch the ceph_client structure to embed its ceph_messenger +structure. + +Signed-off-by: Alex Elder +Reviewed-by: Yehuda Sadeh +Reviewed-by: Sage Weil +Signed-off-by: Greg Kroah-Hartman +--- + fs/ceph/mds_client.c | 2 +- + include/linux/ceph/libceph.h | 2 +- + include/linux/ceph/messenger.h | 9 +++++---- + net/ceph/ceph_common.c | 18 +++++------------- + net/ceph/messenger.c | 30 +++++++++--------------------- + net/ceph/mon_client.c | 6 +++--- + net/ceph/osd_client.c | 4 ++-- + 7 files changed, 26 insertions(+), 45 deletions(-) + +--- a/fs/ceph/mds_client.c ++++ b/fs/ceph/mds_client.c +@@ -394,7 +394,7 @@ static struct ceph_mds_session *register + s->s_seq = 0; + mutex_init(&s->s_mutex); + +- ceph_con_init(mdsc->fsc->client->msgr, &s->s_con); ++ ceph_con_init(&mdsc->fsc->client->msgr, &s->s_con); + s->s_con.private = s; + s->s_con.ops = &mds_con_ops; + s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; +--- a/include/linux/ceph/libceph.h ++++ b/include/linux/ceph/libceph.h +@@ -132,7 +132,7 @@ struct ceph_client { + u32 supported_features; + u32 required_features; + +- struct ceph_messenger *msgr; /* messenger instance */ ++ struct ceph_messenger msgr; /* messenger instance */ + struct ceph_mon_client monc; + struct ceph_osd_client osdc; + +--- a/include/linux/ceph/messenger.h ++++ b/include/linux/ceph/messenger.h +@@ -203,10 +203,11 @@ extern int ceph_msgr_init(void); + extern void ceph_msgr_exit(void); + extern void ceph_msgr_flush(void); + +-extern struct ceph_messenger *ceph_messenger_create( +- struct ceph_entity_addr *myaddr, +- u32 features, u32 required); +-extern void ceph_messenger_destroy(struct ceph_messenger *); ++extern void ceph_messenger_init(struct ceph_messenger *msgr, ++ struct ceph_entity_addr *myaddr, ++ u32 supported_features, ++ u32 required_features, ++ bool nocrc); + + extern void ceph_con_init(struct ceph_messenger *msgr, + struct ceph_connection *con); +--- a/net/ceph/ceph_common.c ++++ b/net/ceph/ceph_common.c +@@ -468,19 +468,15 @@ struct ceph_client *ceph_create_client(s + /* msgr */ + if (ceph_test_opt(client, MYIP)) + myaddr = &client->options->my_addr; +- client->msgr = ceph_messenger_create(myaddr, +- client->supported_features, +- client->required_features); +- if (IS_ERR(client->msgr)) { +- err = PTR_ERR(client->msgr); +- goto fail; +- } +- client->msgr->nocrc = ceph_test_opt(client, NOCRC); ++ ceph_messenger_init(&client->msgr, myaddr, ++ client->supported_features, ++ client->required_features, ++ ceph_test_opt(client, NOCRC)); + + /* subsystems */ + err = ceph_monc_init(&client->monc, client); + if (err < 0) +- goto fail_msgr; ++ goto fail; + err = ceph_osdc_init(&client->osdc, client); + if (err < 0) + goto fail_monc; +@@ -489,8 +485,6 @@ struct ceph_client *ceph_create_client(s + + fail_monc: + ceph_monc_stop(&client->monc); +-fail_msgr: +- ceph_messenger_destroy(client->msgr); + fail: + kfree(client); + return ERR_PTR(err); +@@ -508,8 +502,6 @@ void ceph_destroy_client(struct ceph_cli + + ceph_debugfs_client_cleanup(client); + +- ceph_messenger_destroy(client->msgr); +- + ceph_destroy_options(client->options); + + kfree(client); +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -2249,18 +2249,14 @@ out: + + + /* +- * create a new messenger instance ++ * initialize a new messenger instance + */ +-struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, +- u32 supported_features, +- u32 required_features) ++void ceph_messenger_init(struct ceph_messenger *msgr, ++ struct ceph_entity_addr *myaddr, ++ u32 supported_features, ++ u32 required_features, ++ bool nocrc) + { +- struct ceph_messenger *msgr; +- +- msgr = kzalloc(sizeof(*msgr), GFP_KERNEL); +- if (msgr == NULL) +- return ERR_PTR(-ENOMEM); +- + msgr->supported_features = supported_features; + msgr->required_features = required_features; + +@@ -2273,19 +2269,11 @@ struct ceph_messenger *ceph_messenger_cr + msgr->inst.addr.type = 0; + get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce)); + encode_my_addr(msgr); ++ msgr->nocrc = nocrc; + +- dout("messenger_create %p\n", msgr); +- return msgr; +-} +-EXPORT_SYMBOL(ceph_messenger_create); +- +-void ceph_messenger_destroy(struct ceph_messenger *msgr) +-{ +- dout("destroy %p\n", msgr); +- kfree(msgr); +- dout("destroyed messenger %p\n", msgr); ++ dout("%s %p\n", __func__, msgr); + } +-EXPORT_SYMBOL(ceph_messenger_destroy); ++EXPORT_SYMBOL(ceph_messenger_init); + + static void clear_standby(struct ceph_connection *con) + { +--- a/net/ceph/mon_client.c ++++ b/net/ceph/mon_client.c +@@ -763,7 +763,7 @@ int ceph_monc_init(struct ceph_mon_clien + monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL); + if (!monc->con) + goto out_monmap; +- ceph_con_init(monc->client->msgr, monc->con); ++ ceph_con_init(&monc->client->msgr, monc->con); + monc->con->private = monc; + monc->con->ops = &mon_con_ops; + +@@ -888,8 +888,8 @@ static void handle_auth_reply(struct cep + } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { + dout("authenticated, starting session\n"); + +- monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; +- monc->client->msgr->inst.name.num = ++ monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT; ++ monc->client->msgr.inst.name.num = + cpu_to_le64(monc->auth->global_id); + + __send_subscribe(monc); +--- a/net/ceph/osd_client.c ++++ b/net/ceph/osd_client.c +@@ -639,7 +639,7 @@ static struct ceph_osd *create_osd(struc + INIT_LIST_HEAD(&osd->o_osd_lru); + osd->o_incarnation = 1; + +- ceph_con_init(osdc->client->msgr, &osd->o_con); ++ ceph_con_init(&osdc->client->msgr, &osd->o_con); + osd->o_con.private = osd; + osd->o_con.ops = &osd_con_ops; + osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD; +@@ -1391,7 +1391,7 @@ void ceph_osdc_handle_map(struct ceph_os + epoch, maplen); + newmap = osdmap_apply_incremental(&p, next, + osdc->osdmap, +- osdc->client->msgr); ++ &osdc->client->msgr); + if (IS_ERR(newmap)) { + err = PTR_ERR(newmap); + goto bad; diff --git a/queue-3.4/0043-libceph-start-separating-connection-flags-from-state.patch b/queue-3.4/0043-libceph-start-separating-connection-flags-from-state.patch new file mode 100644 index 00000000000..d32529e9804 --- /dev/null +++ b/queue-3.4/0043-libceph-start-separating-connection-flags-from-state.patch @@ -0,0 +1,257 @@ +From f47510a7851c8440845a0c1a7184323cc10ec15d Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Tue, 22 May 2012 11:41:43 -0500 +Subject: libceph: start separating connection flags from state + +From: Alex Elder + +(cherry picked from commit 928443cd9644e7cfd46f687dbeffda2d1a357ff9) + +A ceph_connection holds a mixture of connection state (as in "state +machine" state) and connection flags in a single "state" field. To +make the distinction more clear, define a new "flags" field and use +it rather than the "state" field to hold Boolean flag values. + +Signed-off-by: Alex Elder +Reviewed-by: Sage Weil +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/ceph/messenger.h | 18 +++++++++----- + net/ceph/messenger.c | 50 ++++++++++++++++++++--------------------- + 2 files changed, 37 insertions(+), 31 deletions(-) + +--- a/include/linux/ceph/messenger.h ++++ b/include/linux/ceph/messenger.h +@@ -103,20 +103,25 @@ struct ceph_msg_pos { + #define MAX_DELAY_INTERVAL (5 * 60 * HZ) + + /* +- * ceph_connection state bit flags ++ * ceph_connection flag bits + */ ++ + #define LOSSYTX 0 /* we can close channel or drop messages on errors */ +-#define CONNECTING 1 +-#define NEGOTIATING 2 + #define KEEPALIVE_PENDING 3 + #define WRITE_PENDING 4 /* we have data ready to send */ ++#define SOCK_CLOSED 11 /* socket state changed to closed */ ++#define BACKOFF 15 ++ ++/* ++ * ceph_connection states ++ */ ++#define CONNECTING 1 ++#define NEGOTIATING 2 + #define STANDBY 8 /* no outgoing messages, socket closed. we keep + * the ceph_connection around to maintain shared + * state with the peer. */ + #define CLOSED 10 /* we've closed the connection */ +-#define SOCK_CLOSED 11 /* socket state changed to closed */ + #define OPENING 13 /* open connection w/ (possibly new) peer */ +-#define BACKOFF 15 + + /* + * A single connection with another host. +@@ -133,7 +138,8 @@ struct ceph_connection { + + struct ceph_messenger *msgr; + struct socket *sock; +- unsigned long state; /* connection state (see flags above) */ ++ unsigned long flags; ++ unsigned long state; + const char *error_msg; /* error message, if any */ + + struct ceph_entity_addr peer_addr; /* peer address */ +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -176,7 +176,7 @@ static void ceph_sock_write_space(struct + * buffer. See net/ipv4/tcp_input.c:tcp_check_space() + * and net/core/stream.c:sk_stream_write_space(). + */ +- if (test_bit(WRITE_PENDING, &con->state)) { ++ if (test_bit(WRITE_PENDING, &con->flags)) { + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { + dout("%s %p queueing write work\n", __func__, con); + clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); +@@ -203,7 +203,7 @@ static void ceph_sock_state_change(struc + dout("%s TCP_CLOSE\n", __func__); + case TCP_CLOSE_WAIT: + dout("%s TCP_CLOSE_WAIT\n", __func__); +- if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) { ++ if (test_and_set_bit(SOCK_CLOSED, &con->flags) == 0) { + if (test_bit(CONNECTING, &con->state)) + con->error_msg = "connection failed"; + else +@@ -395,9 +395,9 @@ void ceph_con_close(struct ceph_connecti + ceph_pr_addr(&con->peer_addr.in_addr)); + set_bit(CLOSED, &con->state); /* in case there's queued work */ + clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ +- clear_bit(LOSSYTX, &con->state); /* so we retry next connect */ +- clear_bit(KEEPALIVE_PENDING, &con->state); +- clear_bit(WRITE_PENDING, &con->state); ++ clear_bit(LOSSYTX, &con->flags); /* so we retry next connect */ ++ clear_bit(KEEPALIVE_PENDING, &con->flags); ++ clear_bit(WRITE_PENDING, &con->flags); + mutex_lock(&con->mutex); + reset_connection(con); + con->peer_global_seq = 0; +@@ -618,7 +618,7 @@ static void prepare_write_message(struct + prepare_write_message_footer(con); + } + +- set_bit(WRITE_PENDING, &con->state); ++ set_bit(WRITE_PENDING, &con->flags); + } + + /* +@@ -639,7 +639,7 @@ static void prepare_write_ack(struct cep + &con->out_temp_ack); + + con->out_more = 1; /* more will follow.. eventually.. */ +- set_bit(WRITE_PENDING, &con->state); ++ set_bit(WRITE_PENDING, &con->flags); + } + + /* +@@ -650,7 +650,7 @@ static void prepare_write_keepalive(stru + dout("prepare_write_keepalive %p\n", con); + con_out_kvec_reset(con); + con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); +- set_bit(WRITE_PENDING, &con->state); ++ set_bit(WRITE_PENDING, &con->flags); + } + + /* +@@ -679,7 +679,7 @@ static struct ceph_auth_handshake *get_c + + if (IS_ERR(auth)) + return auth; +- if (test_bit(CLOSED, &con->state) || test_bit(OPENING, &con->state)) ++ if (test_bit(CLOSED, &con->state) || test_bit(OPENING, &con->flags)) + return ERR_PTR(-EAGAIN); + + con->auth_reply_buf = auth->authorizer_reply_buf; +@@ -699,7 +699,7 @@ static void prepare_write_banner(struct + &con->msgr->my_enc_addr); + + con->out_more = 0; +- set_bit(WRITE_PENDING, &con->state); ++ set_bit(WRITE_PENDING, &con->flags); + } + + static int prepare_write_connect(struct ceph_connection *con) +@@ -749,7 +749,7 @@ static int prepare_write_connect(struct + auth->authorizer_buf); + + con->out_more = 0; +- set_bit(WRITE_PENDING, &con->state); ++ set_bit(WRITE_PENDING, &con->flags); + + return 0; + } +@@ -1496,7 +1496,7 @@ static int process_connect(struct ceph_c + le32_to_cpu(con->in_reply.connect_seq)); + + if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY) +- set_bit(LOSSYTX, &con->state); ++ set_bit(LOSSYTX, &con->flags); + + prepare_read_tag(con); + break; +@@ -1937,14 +1937,14 @@ do_next: + prepare_write_ack(con); + goto more; + } +- if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) { ++ if (test_and_clear_bit(KEEPALIVE_PENDING, &con->flags)) { + prepare_write_keepalive(con); + goto more; + } + } + + /* Nothing to do! */ +- clear_bit(WRITE_PENDING, &con->state); ++ clear_bit(WRITE_PENDING, &con->flags); + dout("try_write nothing else to write.\n"); + ret = 0; + out: +@@ -2110,7 +2110,7 @@ static void con_work(struct work_struct + + mutex_lock(&con->mutex); + restart: +- if (test_and_clear_bit(BACKOFF, &con->state)) { ++ if (test_and_clear_bit(BACKOFF, &con->flags)) { + dout("con_work %p backing off\n", con); + if (queue_delayed_work(ceph_msgr_wq, &con->work, + round_jiffies_relative(con->delay))) { +@@ -2139,7 +2139,7 @@ restart: + con_close_socket(con); + } + +- if (test_and_clear_bit(SOCK_CLOSED, &con->state)) ++ if (test_and_clear_bit(SOCK_CLOSED, &con->flags)) + goto fault; + + ret = try_read(con); +@@ -2178,7 +2178,7 @@ static void ceph_fault(struct ceph_conne + dout("fault %p state %lu to peer %s\n", + con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); + +- if (test_bit(LOSSYTX, &con->state)) { ++ if (test_bit(LOSSYTX, &con->flags)) { + dout("fault on LOSSYTX channel\n"); + goto out; + } +@@ -2200,9 +2200,9 @@ static void ceph_fault(struct ceph_conne + /* If there are no messages queued or keepalive pending, place + * the connection in a STANDBY state */ + if (list_empty(&con->out_queue) && +- !test_bit(KEEPALIVE_PENDING, &con->state)) { ++ !test_bit(KEEPALIVE_PENDING, &con->flags)) { + dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con); +- clear_bit(WRITE_PENDING, &con->state); ++ clear_bit(WRITE_PENDING, &con->flags); + set_bit(STANDBY, &con->state); + } else { + /* retry after a delay. */ +@@ -2226,7 +2226,7 @@ static void ceph_fault(struct ceph_conne + * that when con_work restarts we schedule the + * delay then. + */ +- set_bit(BACKOFF, &con->state); ++ set_bit(BACKOFF, &con->flags); + } + } + +@@ -2282,8 +2282,8 @@ static void clear_standby(struct ceph_co + mutex_lock(&con->mutex); + dout("clear_standby %p and ++connect_seq\n", con); + con->connect_seq++; +- WARN_ON(test_bit(WRITE_PENDING, &con->state)); +- WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state)); ++ WARN_ON(test_bit(WRITE_PENDING, &con->flags)); ++ WARN_ON(test_bit(KEEPALIVE_PENDING, &con->flags)); + mutex_unlock(&con->mutex); + } + } +@@ -2321,7 +2321,7 @@ void ceph_con_send(struct ceph_connectio + /* if there wasn't anything waiting to send before, queue + * new work */ + clear_standby(con); +- if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) ++ if (test_and_set_bit(WRITE_PENDING, &con->flags) == 0) + queue_con(con); + } + EXPORT_SYMBOL(ceph_con_send); +@@ -2388,8 +2388,8 @@ void ceph_con_keepalive(struct ceph_conn + { + dout("con_keepalive %p\n", con); + clear_standby(con); +- if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && +- test_and_set_bit(WRITE_PENDING, &con->state) == 0) ++ if (test_and_set_bit(KEEPALIVE_PENDING, &con->flags) == 0 && ++ test_and_set_bit(WRITE_PENDING, &con->flags) == 0) + queue_con(con); + } + EXPORT_SYMBOL(ceph_con_keepalive); diff --git a/queue-3.4/0044-libceph-start-tracking-connection-socket-state.patch b/queue-3.4/0044-libceph-start-tracking-connection-socket-state.patch new file mode 100644 index 00000000000..ec5445264f0 --- /dev/null +++ b/queue-3.4/0044-libceph-start-tracking-connection-socket-state.patch @@ -0,0 +1,199 @@ +From 1dc8c9af19ebb0486a3d573579358c4a3f918bb0 Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Tue, 22 May 2012 22:15:49 -0500 +Subject: libceph: start tracking connection socket state + +From: Alex Elder + +(cherry picked from commit ce2c8903e76e690846a00a0284e4bd9ee954d680) + +Start explicitly keeping track of the state of a ceph connection's +socket, separate from the state of the connection itself. Create +placeholder functions to encapsulate the state transitions. + + -------- + | NEW* | transient initial state + -------- + | con_sock_state_init() + v + ---------- + | CLOSED | initialized, but no socket (and no + ---------- TCP connection) + ^ \ + | \ con_sock_state_connecting() + | ---------------------- + | \ + + con_sock_state_closed() \ + |\ \ + | \ \ + | ----------- \ + | | CLOSING | socket event; \ + | ----------- await close \ + | ^ | + | | | + | + con_sock_state_closing() | + | / \ | + | / --------------- | + | / \ v + | / -------------- + | / -----------------| CONNECTING | socket created, TCP + | | / -------------- connect initiated + | | | con_sock_state_connected() + | | v + ------------- + | CONNECTED | TCP connection established + ------------- + +Make the socket state an atomic variable, reinforcing that it's a +distinct transtion with no possible "intermediate/both" states. +This is almost certainly overkill at this point, though the +transitions into CONNECTED and CLOSING state do get called via +socket callback (the rest of the transitions occur with the +connection mutex held). We can back out the atomicity later. + +Signed-off-by: Alex Elder +Reviewed-by: Sage Weil +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/ceph/messenger.h | 8 +++-- + net/ceph/messenger.c | 64 +++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 70 insertions(+), 2 deletions(-) + +--- a/include/linux/ceph/messenger.h ++++ b/include/linux/ceph/messenger.h +@@ -137,14 +137,18 @@ struct ceph_connection { + const struct ceph_connection_operations *ops; + + struct ceph_messenger *msgr; ++ ++ atomic_t sock_state; + struct socket *sock; ++ struct ceph_entity_addr peer_addr; /* peer address */ ++ struct ceph_entity_addr peer_addr_for_me; ++ + unsigned long flags; + unsigned long state; + const char *error_msg; /* error message, if any */ + +- struct ceph_entity_addr peer_addr; /* peer address */ + struct ceph_entity_name peer_name; /* peer name */ +- struct ceph_entity_addr peer_addr_for_me; ++ + unsigned peer_features; + u32 connect_seq; /* identify the most recent connection + attempt for this connection, client */ +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -29,6 +29,14 @@ + * the sender. + */ + ++/* State values for ceph_connection->sock_state; NEW is assumed to be 0 */ ++ ++#define CON_SOCK_STATE_NEW 0 /* -> CLOSED */ ++#define CON_SOCK_STATE_CLOSED 1 /* -> CONNECTING */ ++#define CON_SOCK_STATE_CONNECTING 2 /* -> CONNECTED or -> CLOSING */ ++#define CON_SOCK_STATE_CONNECTED 3 /* -> CLOSING or -> CLOSED */ ++#define CON_SOCK_STATE_CLOSING 4 /* -> CLOSED */ ++ + /* static tag bytes (protocol control messages) */ + static char tag_msg = CEPH_MSGR_TAG_MSG; + static char tag_ack = CEPH_MSGR_TAG_ACK; +@@ -147,6 +155,55 @@ void ceph_msgr_flush(void) + } + EXPORT_SYMBOL(ceph_msgr_flush); + ++/* Connection socket state transition functions */ ++ ++static void con_sock_state_init(struct ceph_connection *con) ++{ ++ int old_state; ++ ++ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED); ++ if (WARN_ON(old_state != CON_SOCK_STATE_NEW)) ++ printk("%s: unexpected old state %d\n", __func__, old_state); ++} ++ ++static void con_sock_state_connecting(struct ceph_connection *con) ++{ ++ int old_state; ++ ++ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTING); ++ if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED)) ++ printk("%s: unexpected old state %d\n", __func__, old_state); ++} ++ ++static void con_sock_state_connected(struct ceph_connection *con) ++{ ++ int old_state; ++ ++ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED); ++ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING)) ++ printk("%s: unexpected old state %d\n", __func__, old_state); ++} ++ ++static void con_sock_state_closing(struct ceph_connection *con) ++{ ++ int old_state; ++ ++ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING); ++ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING && ++ old_state != CON_SOCK_STATE_CONNECTED && ++ old_state != CON_SOCK_STATE_CLOSING)) ++ printk("%s: unexpected old state %d\n", __func__, old_state); ++} ++ ++static void con_sock_state_closed(struct ceph_connection *con) ++{ ++ int old_state; ++ ++ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED); ++ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED && ++ old_state != CON_SOCK_STATE_CLOSING)) ++ printk("%s: unexpected old state %d\n", __func__, old_state); ++} + + /* + * socket callback functions +@@ -203,6 +260,7 @@ static void ceph_sock_state_change(struc + dout("%s TCP_CLOSE\n", __func__); + case TCP_CLOSE_WAIT: + dout("%s TCP_CLOSE_WAIT\n", __func__); ++ con_sock_state_closing(con); + if (test_and_set_bit(SOCK_CLOSED, &con->flags) == 0) { + if (test_bit(CONNECTING, &con->state)) + con->error_msg = "connection failed"; +@@ -213,6 +271,7 @@ static void ceph_sock_state_change(struc + break; + case TCP_ESTABLISHED: + dout("%s TCP_ESTABLISHED\n", __func__); ++ con_sock_state_connected(con); + queue_con(con); + break; + default: /* Everything else is uninteresting */ +@@ -277,6 +336,7 @@ static int ceph_tcp_connect(struct ceph_ + return ret; + } + con->sock = sock; ++ con_sock_state_connecting(con); + + return 0; + } +@@ -343,6 +403,7 @@ static int con_close_socket(struct ceph_ + sock_release(con->sock); + con->sock = NULL; + clear_bit(SOCK_CLOSED, &con->state); ++ con_sock_state_closed(con); + return rc; + } + +@@ -462,6 +523,9 @@ void ceph_con_init(struct ceph_messenger + memset(con, 0, sizeof(*con)); + atomic_set(&con->nref, 1); + con->msgr = msgr; ++ ++ con_sock_state_init(con); ++ + mutex_init(&con->mutex); + INIT_LIST_HEAD(&con->out_queue); + INIT_LIST_HEAD(&con->out_sent); diff --git a/queue-3.4/0045-libceph-provide-osd-number-when-creating-osd.patch b/queue-3.4/0045-libceph-provide-osd-number-when-creating-osd.patch new file mode 100644 index 00000000000..04b23d64813 --- /dev/null +++ b/queue-3.4/0045-libceph-provide-osd-number-when-creating-osd.patch @@ -0,0 +1,63 @@ +From 3cfa1d37bc05179577c43f4ed6b2689556729813 Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Sat, 26 May 2012 23:26:43 -0500 +Subject: libceph: provide osd number when creating osd + +From: Alex Elder + +(cherry picked from commit e10006f807ffc4d5b1d861305d18d9e8145891ca) + +Pass the osd number to the create_osd() routine, and move the +initialization of fields that depend on it therein. + +Signed-off-by: Alex Elder +Reviewed-by: Sage Weil +Signed-off-by: Greg Kroah-Hartman +--- + net/ceph/osd_client.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/net/ceph/osd_client.c ++++ b/net/ceph/osd_client.c +@@ -624,7 +624,7 @@ static void osd_reset(struct ceph_connec + /* + * Track open sessions with osds. + */ +-static struct ceph_osd *create_osd(struct ceph_osd_client *osdc) ++static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) + { + struct ceph_osd *osd; + +@@ -634,6 +634,7 @@ static struct ceph_osd *create_osd(struc + + atomic_set(&osd->o_ref, 1); + osd->o_osdc = osdc; ++ osd->o_osd = onum; + INIT_LIST_HEAD(&osd->o_requests); + INIT_LIST_HEAD(&osd->o_linger_requests); + INIT_LIST_HEAD(&osd->o_osd_lru); +@@ -643,6 +644,7 @@ static struct ceph_osd *create_osd(struc + osd->o_con.private = osd; + osd->o_con.ops = &osd_con_ops; + osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD; ++ osd->o_con.peer_name.num = cpu_to_le64(onum); + + INIT_LIST_HEAD(&osd->o_keepalive_item); + return osd; +@@ -998,15 +1000,13 @@ static int __map_request(struct ceph_osd + req->r_osd = __lookup_osd(osdc, o); + if (!req->r_osd && o >= 0) { + err = -ENOMEM; +- req->r_osd = create_osd(osdc); ++ req->r_osd = create_osd(osdc, o); + if (!req->r_osd) { + list_move(&req->r_req_lru_item, &osdc->req_notarget); + goto out; + } + + dout("map_request osd %p is osd%d\n", req->r_osd, o); +- req->r_osd->o_osd = o; +- req->r_osd->o_con.peer_name.num = cpu_to_le64(o); + __insert_osd(osdc, req->r_osd); + + ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]); diff --git a/queue-3.4/0046-libceph-set-CLOSED-state-bit-in-con_init.patch b/queue-3.4/0046-libceph-set-CLOSED-state-bit-in-con_init.patch new file mode 100644 index 00000000000..8ec7651869c --- /dev/null +++ b/queue-3.4/0046-libceph-set-CLOSED-state-bit-in-con_init.patch @@ -0,0 +1,83 @@ +From 2a162524ff9c9635cf040179c1f587b08fe5efa7 Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Tue, 29 May 2012 11:04:58 -0500 +Subject: libceph: set CLOSED state bit in con_init + +From: Alex Elder + +(cherry picked from commit a5988c490ef66cb04ea2f610681949b25c773b3c) + +Once a connection is fully initialized, it is really in a CLOSED +state, so make that explicit by setting the bit in its state field. + +It is possible for a connection in NEGOTIATING state to get a +failure, leading to ceph_fault() and ultimately ceph_con_close(). +Clear that bits if it is set in that case, to reflect that the +connection truly is closed and is no longer participating in a +connect sequence. + +Issue a warning if ceph_con_open() is called on a connection that +is not in CLOSED state. + +Signed-off-by: Alex Elder +Reviewed-by: Sage Weil +Signed-off-by: Greg Kroah-Hartman +--- + net/ceph/messenger.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -454,11 +454,14 @@ void ceph_con_close(struct ceph_connecti + { + dout("con_close %p peer %s\n", con, + ceph_pr_addr(&con->peer_addr.in_addr)); +- set_bit(CLOSED, &con->state); /* in case there's queued work */ ++ clear_bit(NEGOTIATING, &con->state); + clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ ++ set_bit(CLOSED, &con->state); ++ + clear_bit(LOSSYTX, &con->flags); /* so we retry next connect */ + clear_bit(KEEPALIVE_PENDING, &con->flags); + clear_bit(WRITE_PENDING, &con->flags); ++ + mutex_lock(&con->mutex); + reset_connection(con); + con->peer_global_seq = 0; +@@ -475,7 +478,8 @@ void ceph_con_open(struct ceph_connectio + { + dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); + set_bit(OPENING, &con->state); +- clear_bit(CLOSED, &con->state); ++ WARN_ON(!test_and_clear_bit(CLOSED, &con->state)); ++ + memcpy(&con->peer_addr, addr, sizeof(*addr)); + con->delay = 0; /* reset backoff memory */ + queue_con(con); +@@ -530,6 +534,8 @@ void ceph_con_init(struct ceph_messenger + INIT_LIST_HEAD(&con->out_queue); + INIT_LIST_HEAD(&con->out_sent); + INIT_DELAYED_WORK(&con->work, con_work); ++ ++ set_bit(CLOSED, &con->state); + } + EXPORT_SYMBOL(ceph_con_init); + +@@ -1937,14 +1943,15 @@ more: + + /* open the socket first? */ + if (con->sock == NULL) { ++ clear_bit(NEGOTIATING, &con->state); ++ set_bit(CONNECTING, &con->state); ++ + con_out_kvec_reset(con); + prepare_write_banner(con); + ret = prepare_write_connect(con); + if (ret < 0) + goto out; + prepare_read_banner(con); +- set_bit(CONNECTING, &con->state); +- clear_bit(NEGOTIATING, &con->state); + + BUG_ON(con->in_msg); + con->in_tag = CEPH_MSGR_TAG_READY; diff --git a/queue-3.4/0047-libceph-embed-ceph-connection-structure-in-mon_clien.patch b/queue-3.4/0047-libceph-embed-ceph-connection-structure-in-mon_clien.patch new file mode 100644 index 00000000000..16293509828 --- /dev/null +++ b/queue-3.4/0047-libceph-embed-ceph-connection-structure-in-mon_clien.patch @@ -0,0 +1,178 @@ +From ac495165ac2a91db87b344c4e769e3eccac72f89 Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Sat, 26 May 2012 23:26:43 -0500 +Subject: libceph: embed ceph connection structure in mon_client + +From: Alex Elder + +(cherry picked from commit 67130934fb579fdf0f2f6d745960264378b57dc8) + +A monitor client has a pointer to a ceph connection structure in it. +This is the only one of the three ceph client types that do it this +way; the OSD and MDS clients embed the connection into their main +structures. There is always exactly one ceph connection for a +monitor client, so there is no need to allocate it separate from the +monitor client structure. + +So switch the ceph_mon_client structure to embed its +ceph_connection structure. + +Signed-off-by: Alex Elder +Reviewed-by: Sage Weil +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/ceph/mon_client.h | 2 - + net/ceph/mon_client.c | 47 +++++++++++++++++----------------------- + 2 files changed, 21 insertions(+), 28 deletions(-) + +--- a/include/linux/ceph/mon_client.h ++++ b/include/linux/ceph/mon_client.h +@@ -70,7 +70,7 @@ struct ceph_mon_client { + bool hunting; + int cur_mon; /* last monitor i contacted */ + unsigned long sub_sent, sub_renew_after; +- struct ceph_connection *con; ++ struct ceph_connection con; + bool have_fsid; + + /* pending generic requests */ +--- a/net/ceph/mon_client.c ++++ b/net/ceph/mon_client.c +@@ -106,9 +106,9 @@ static void __send_prepared_auth_request + monc->pending_auth = 1; + monc->m_auth->front.iov_len = len; + monc->m_auth->hdr.front_len = cpu_to_le32(len); +- ceph_con_revoke(monc->con, monc->m_auth); ++ ceph_con_revoke(&monc->con, monc->m_auth); + ceph_msg_get(monc->m_auth); /* keep our ref */ +- ceph_con_send(monc->con, monc->m_auth); ++ ceph_con_send(&monc->con, monc->m_auth); + } + + /* +@@ -117,8 +117,8 @@ static void __send_prepared_auth_request + static void __close_session(struct ceph_mon_client *monc) + { + dout("__close_session closing mon%d\n", monc->cur_mon); +- ceph_con_revoke(monc->con, monc->m_auth); +- ceph_con_close(monc->con); ++ ceph_con_revoke(&monc->con, monc->m_auth); ++ ceph_con_close(&monc->con); + monc->cur_mon = -1; + monc->pending_auth = 0; + ceph_auth_reset(monc->auth); +@@ -142,9 +142,9 @@ static int __open_session(struct ceph_mo + monc->want_next_osdmap = !!monc->want_next_osdmap; + + dout("open_session mon%d opening\n", monc->cur_mon); +- monc->con->peer_name.type = CEPH_ENTITY_TYPE_MON; +- monc->con->peer_name.num = cpu_to_le64(monc->cur_mon); +- ceph_con_open(monc->con, ++ monc->con.peer_name.type = CEPH_ENTITY_TYPE_MON; ++ monc->con.peer_name.num = cpu_to_le64(monc->cur_mon); ++ ceph_con_open(&monc->con, + &monc->monmap->mon_inst[monc->cur_mon].addr); + + /* initiatiate authentication handshake */ +@@ -226,8 +226,8 @@ static void __send_subscribe(struct ceph + + msg->front.iov_len = p - msg->front.iov_base; + msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); +- ceph_con_revoke(monc->con, msg); +- ceph_con_send(monc->con, ceph_msg_get(msg)); ++ ceph_con_revoke(&monc->con, msg); ++ ceph_con_send(&monc->con, ceph_msg_get(msg)); + + monc->sub_sent = jiffies | 1; /* never 0 */ + } +@@ -247,7 +247,7 @@ static void handle_subscribe_ack(struct + if (monc->hunting) { + pr_info("mon%d %s session established\n", + monc->cur_mon, +- ceph_pr_addr(&monc->con->peer_addr.in_addr)); ++ ceph_pr_addr(&monc->con.peer_addr.in_addr)); + monc->hunting = false; + } + dout("handle_subscribe_ack after %d seconds\n", seconds); +@@ -461,7 +461,7 @@ static int do_generic_request(struct cep + req->request->hdr.tid = cpu_to_le64(req->tid); + __insert_generic_request(monc, req); + monc->num_generic_requests++; +- ceph_con_send(monc->con, ceph_msg_get(req->request)); ++ ceph_con_send(&monc->con, ceph_msg_get(req->request)); + mutex_unlock(&monc->mutex); + + err = wait_for_completion_interruptible(&req->completion); +@@ -684,8 +684,8 @@ static void __resend_generic_request(str + + for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) { + req = rb_entry(p, struct ceph_mon_generic_request, node); +- ceph_con_revoke(monc->con, req->request); +- ceph_con_send(monc->con, ceph_msg_get(req->request)); ++ ceph_con_revoke(&monc->con, req->request); ++ ceph_con_send(&monc->con, ceph_msg_get(req->request)); + } + } + +@@ -705,7 +705,7 @@ static void delayed_work(struct work_str + __close_session(monc); + __open_session(monc); /* continue hunting */ + } else { +- ceph_con_keepalive(monc->con); ++ ceph_con_keepalive(&monc->con); + + __validate_auth(monc); + +@@ -760,19 +760,16 @@ int ceph_monc_init(struct ceph_mon_clien + goto out; + + /* connection */ +- monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL); +- if (!monc->con) +- goto out_monmap; +- ceph_con_init(&monc->client->msgr, monc->con); +- monc->con->private = monc; +- monc->con->ops = &mon_con_ops; ++ ceph_con_init(&monc->client->msgr, &monc->con); ++ monc->con.private = monc; ++ monc->con.ops = &mon_con_ops; + + /* authentication */ + monc->auth = ceph_auth_init(cl->options->name, + cl->options->key); + if (IS_ERR(monc->auth)) { + err = PTR_ERR(monc->auth); +- goto out_con; ++ goto out_monmap; + } + monc->auth->want_keys = + CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | +@@ -824,8 +821,6 @@ out_subscribe_ack: + ceph_msg_put(monc->m_subscribe_ack); + out_auth: + ceph_auth_destroy(monc->auth); +-out_con: +- monc->con->ops->put(monc->con); + out_monmap: + kfree(monc->monmap); + out: +@@ -841,9 +836,7 @@ void ceph_monc_stop(struct ceph_mon_clie + mutex_lock(&monc->mutex); + __close_session(monc); + +- monc->con->private = NULL; +- monc->con->ops->put(monc->con); +- monc->con = NULL; ++ monc->con.private = NULL; + + mutex_unlock(&monc->mutex); + +@@ -1029,7 +1022,7 @@ static void mon_fault(struct ceph_connec + if (!monc->hunting) + pr_info("mon%d %s session lost, " + "hunting for new mon\n", monc->cur_mon, +- ceph_pr_addr(&monc->con->peer_addr.in_addr)); ++ ceph_pr_addr(&monc->con.peer_addr.in_addr)); + + __close_session(monc); + if (!monc->hunting) { diff --git a/queue-3.4/0048-libceph-drop-connection-refcounting-for-mon_client.patch b/queue-3.4/0048-libceph-drop-connection-refcounting-for-mon_client.patch new file mode 100644 index 00000000000..a8dfae48e1c --- /dev/null +++ b/queue-3.4/0048-libceph-drop-connection-refcounting-for-mon_client.patch @@ -0,0 +1,48 @@ +From 646a893f1d8346dc1b2826c684de99e5df37d5ed Mon Sep 17 00:00:00 2001 +From: Sage Weil +Date: Thu, 31 May 2012 20:27:50 -0700 +Subject: libceph: drop connection refcounting for mon_client + +From: Sage Weil + +(cherry picked from commit ec87ef4309d33bd9c87a53bb5152a86ae7a65f25) + +All references to the embedded ceph_connection come from the msgr +workqueue, which is drained prior to mon_client destruction. That +means we can ignore con refcounting entirely. + +Signed-off-by: Sage Weil +Reviewed-by: Alex Elder +Signed-off-by: Greg Kroah-Hartman +--- + net/ceph/mon_client.c | 18 ++++++++++++++++-- + 1 file changed, 16 insertions(+), 2 deletions(-) + +--- a/net/ceph/mon_client.c ++++ b/net/ceph/mon_client.c +@@ -1037,9 +1037,23 @@ out: + mutex_unlock(&monc->mutex); + } + ++/* ++ * We can ignore refcounting on the connection struct, as all references ++ * will come from the messenger workqueue, which is drained prior to ++ * mon_client destruction. ++ */ ++static struct ceph_connection *con_get(struct ceph_connection *con) ++{ ++ return con; ++} ++ ++static void con_put(struct ceph_connection *con) ++{ ++} ++ + static const struct ceph_connection_operations mon_con_ops = { +- .get = ceph_con_get, +- .put = ceph_con_put, ++ .get = con_get, ++ .put = con_put, + .dispatch = dispatch, + .fault = mon_fault, + .alloc_msg = mon_alloc_msg, diff --git a/queue-3.4/0049-libceph-init-monitor-connection-when-opening.patch b/queue-3.4/0049-libceph-init-monitor-connection-when-opening.patch new file mode 100644 index 00000000000..b1050daa816 --- /dev/null +++ b/queue-3.4/0049-libceph-init-monitor-connection-when-opening.patch @@ -0,0 +1,64 @@ +From efea1a38cd969f4b52ec5cc468d8143bb496efc2 Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Sat, 26 May 2012 23:26:43 -0500 +Subject: libceph: init monitor connection when opening + +From: Alex Elder + +(cherry picked from commit 20581c1faf7b15ae1f8b80c0ec757877b0b53151) + +Hold off initializing a monitor client's connection until just +before it gets opened for use. + +Signed-off-by: Alex Elder +Reviewed-by: Sage Weil +Signed-off-by: Greg Kroah-Hartman +--- + net/ceph/mon_client.c | 13 ++++++------- + 1 file changed, 6 insertions(+), 7 deletions(-) + +--- a/net/ceph/mon_client.c ++++ b/net/ceph/mon_client.c +@@ -119,6 +119,7 @@ static void __close_session(struct ceph_ + dout("__close_session closing mon%d\n", monc->cur_mon); + ceph_con_revoke(&monc->con, monc->m_auth); + ceph_con_close(&monc->con); ++ monc->con.private = NULL; + monc->cur_mon = -1; + monc->pending_auth = 0; + ceph_auth_reset(monc->auth); +@@ -141,9 +142,13 @@ static int __open_session(struct ceph_mo + monc->sub_renew_after = jiffies; /* i.e., expired */ + monc->want_next_osdmap = !!monc->want_next_osdmap; + +- dout("open_session mon%d opening\n", monc->cur_mon); ++ ceph_con_init(&monc->client->msgr, &monc->con); ++ monc->con.private = monc; ++ monc->con.ops = &mon_con_ops; + monc->con.peer_name.type = CEPH_ENTITY_TYPE_MON; + monc->con.peer_name.num = cpu_to_le64(monc->cur_mon); ++ ++ dout("open_session mon%d opening\n", monc->cur_mon); + ceph_con_open(&monc->con, + &monc->monmap->mon_inst[monc->cur_mon].addr); + +@@ -760,10 +765,6 @@ int ceph_monc_init(struct ceph_mon_clien + goto out; + + /* connection */ +- ceph_con_init(&monc->client->msgr, &monc->con); +- monc->con.private = monc; +- monc->con.ops = &mon_con_ops; +- + /* authentication */ + monc->auth = ceph_auth_init(cl->options->name, + cl->options->key); +@@ -836,8 +837,6 @@ void ceph_monc_stop(struct ceph_mon_clie + mutex_lock(&monc->mutex); + __close_session(monc); + +- monc->con.private = NULL; +- + mutex_unlock(&monc->mutex); + + /* diff --git a/queue-3.4/0050-libceph-fully-initialize-connection-in-con_init.patch b/queue-3.4/0050-libceph-fully-initialize-connection-in-con_init.patch new file mode 100644 index 00000000000..82ec588d78c --- /dev/null +++ b/queue-3.4/0050-libceph-fully-initialize-connection-in-con_init.patch @@ -0,0 +1,116 @@ +From 449c48298a24fb8a48f6e0574f098ddfe6c81325 Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Sat, 26 May 2012 23:26:43 -0500 +Subject: libceph: fully initialize connection in con_init() + +From: Alex Elder + +(cherry picked from commit 1bfd89f4e6e1adc6a782d94aa5d4c53be1e404d7) + +Move the initialization of a ceph connection's private pointer, +operations vector pointer, and peer name information into +ceph_con_init(). Rearrange the arguments so the connection pointer +is first. Hide the byte-swapping of the peer entity number inside +ceph_con_init() + +Signed-off-by: Alex Elder +Reviewed-by: Sage Weil +Signed-off-by: Greg Kroah-Hartman +--- + fs/ceph/mds_client.c | 7 ++----- + include/linux/ceph/messenger.h | 6 ++++-- + net/ceph/messenger.c | 9 ++++++++- + net/ceph/mon_client.c | 8 +++----- + net/ceph/osd_client.c | 7 ++----- + 5 files changed, 19 insertions(+), 18 deletions(-) + +--- a/fs/ceph/mds_client.c ++++ b/fs/ceph/mds_client.c +@@ -394,11 +394,8 @@ static struct ceph_mds_session *register + s->s_seq = 0; + mutex_init(&s->s_mutex); + +- ceph_con_init(&mdsc->fsc->client->msgr, &s->s_con); +- s->s_con.private = s; +- s->s_con.ops = &mds_con_ops; +- s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; +- s->s_con.peer_name.num = cpu_to_le64(mds); ++ ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr, ++ CEPH_ENTITY_TYPE_MDS, mds); + + spin_lock_init(&s->s_gen_ttl_lock); + s->s_cap_gen = 0; +--- a/include/linux/ceph/messenger.h ++++ b/include/linux/ceph/messenger.h +@@ -219,8 +219,10 @@ extern void ceph_messenger_init(struct c + u32 required_features, + bool nocrc); + +-extern void ceph_con_init(struct ceph_messenger *msgr, +- struct ceph_connection *con); ++extern void ceph_con_init(struct ceph_connection *con, void *private, ++ const struct ceph_connection_operations *ops, ++ struct ceph_messenger *msgr, __u8 entity_type, ++ __u64 entity_num); + extern void ceph_con_open(struct ceph_connection *con, + struct ceph_entity_addr *addr); + extern bool ceph_con_opened(struct ceph_connection *con); +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -521,15 +521,22 @@ void ceph_con_put(struct ceph_connection + /* + * initialize a new connection. + */ +-void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con) ++void ceph_con_init(struct ceph_connection *con, void *private, ++ const struct ceph_connection_operations *ops, ++ struct ceph_messenger *msgr, __u8 entity_type, __u64 entity_num) + { + dout("con_init %p\n", con); + memset(con, 0, sizeof(*con)); ++ con->private = private; ++ con->ops = ops; + atomic_set(&con->nref, 1); + con->msgr = msgr; + + con_sock_state_init(con); + ++ con->peer_name.type = (__u8) entity_type; ++ con->peer_name.num = cpu_to_le64(entity_num); ++ + mutex_init(&con->mutex); + INIT_LIST_HEAD(&con->out_queue); + INIT_LIST_HEAD(&con->out_sent); +--- a/net/ceph/mon_client.c ++++ b/net/ceph/mon_client.c +@@ -142,11 +142,9 @@ static int __open_session(struct ceph_mo + monc->sub_renew_after = jiffies; /* i.e., expired */ + monc->want_next_osdmap = !!monc->want_next_osdmap; + +- ceph_con_init(&monc->client->msgr, &monc->con); +- monc->con.private = monc; +- monc->con.ops = &mon_con_ops; +- monc->con.peer_name.type = CEPH_ENTITY_TYPE_MON; +- monc->con.peer_name.num = cpu_to_le64(monc->cur_mon); ++ ceph_con_init(&monc->con, monc, &mon_con_ops, ++ &monc->client->msgr, ++ CEPH_ENTITY_TYPE_MON, monc->cur_mon); + + dout("open_session mon%d opening\n", monc->cur_mon); + ceph_con_open(&monc->con, +--- a/net/ceph/osd_client.c ++++ b/net/ceph/osd_client.c +@@ -640,11 +640,8 @@ static struct ceph_osd *create_osd(struc + INIT_LIST_HEAD(&osd->o_osd_lru); + osd->o_incarnation = 1; + +- ceph_con_init(&osdc->client->msgr, &osd->o_con); +- osd->o_con.private = osd; +- osd->o_con.ops = &osd_con_ops; +- osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD; +- osd->o_con.peer_name.num = cpu_to_le64(onum); ++ ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr, ++ CEPH_ENTITY_TYPE_OSD, onum); + + INIT_LIST_HEAD(&osd->o_keepalive_item); + return osd; diff --git a/queue-3.4/0051-libceph-tweak-ceph_alloc_msg.patch b/queue-3.4/0051-libceph-tweak-ceph_alloc_msg.patch new file mode 100644 index 00000000000..00c620f1732 --- /dev/null +++ b/queue-3.4/0051-libceph-tweak-ceph_alloc_msg.patch @@ -0,0 +1,171 @@ +From 3b865e1b62c6088a110cad70ea725f65775c0c5e Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Mon, 4 Jun 2012 14:43:32 -0500 +Subject: libceph: tweak ceph_alloc_msg() + +From: Alex Elder + +(cherry picked from commit 1c20f2d26795803fc4f5155fe4fca5717a5944b6) + +The function ceph_alloc_msg() is only used to allocate a message +that will be assigned to a connection's in_msg pointer. Rename the +function so this implied usage is more clear. + +In addition, make that assignment inside the function (again, since +that's precisely what it's intended to be used for). This allows us +to return what is now provided via the passed-in address of a "skip" +variable. The return type is now Boolean to be explicit that there +are only two possible outcomes. + +Make sure the result of an ->alloc_msg method call always sets the +value of *skip properly. + +Signed-off-by: Alex Elder +Reviewed-by: Sage Weil +Signed-off-by: Greg Kroah-Hartman +--- + net/ceph/messenger.c | 61 +++++++++++++++++++++++++++----------------------- + net/ceph/mon_client.c | 3 ++ + net/ceph/osd_client.c | 1 + 3 files changed, 38 insertions(+), 27 deletions(-) + +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -1659,9 +1659,8 @@ static int read_partial_message_section( + return 1; + } + +-static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, +- struct ceph_msg_header *hdr, +- int *skip); ++static bool ceph_con_in_msg_alloc(struct ceph_connection *con, ++ struct ceph_msg_header *hdr); + + + static int read_partial_message_pages(struct ceph_connection *con, +@@ -1744,7 +1743,6 @@ static int read_partial_message(struct c + int ret; + unsigned front_len, middle_len, data_len; + bool do_datacrc = !con->msgr->nocrc; +- int skip; + u64 seq; + u32 crc; + +@@ -1797,9 +1795,7 @@ static int read_partial_message(struct c + if (!con->in_msg) { + dout("got hdr type %d front %d data %d\n", con->in_hdr.type, + con->in_hdr.front_len, con->in_hdr.data_len); +- skip = 0; +- con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); +- if (skip) { ++ if (ceph_con_in_msg_alloc(con, &con->in_hdr)) { + /* skip this message */ + dout("alloc_msg said skip message\n"); + BUG_ON(con->in_msg); +@@ -2581,46 +2577,57 @@ static int ceph_alloc_middle(struct ceph + } + + /* +- * Generic message allocator, for incoming messages. ++ * Allocate a message for receiving an incoming message on a ++ * connection, and save the result in con->in_msg. Uses the ++ * connection's private alloc_msg op if available. ++ * ++ * Returns true if the message should be skipped, false otherwise. ++ * If true is returned (skip message), con->in_msg will be NULL. ++ * If false is returned, con->in_msg will contain a pointer to the ++ * newly-allocated message, or NULL in case of memory exhaustion. + */ +-static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, +- struct ceph_msg_header *hdr, +- int *skip) ++static bool ceph_con_in_msg_alloc(struct ceph_connection *con, ++ struct ceph_msg_header *hdr) + { + int type = le16_to_cpu(hdr->type); + int front_len = le32_to_cpu(hdr->front_len); + int middle_len = le32_to_cpu(hdr->middle_len); +- struct ceph_msg *msg = NULL; + int ret; + ++ BUG_ON(con->in_msg != NULL); ++ + if (con->ops->alloc_msg) { ++ int skip = 0; ++ + mutex_unlock(&con->mutex); +- msg = con->ops->alloc_msg(con, hdr, skip); ++ con->in_msg = con->ops->alloc_msg(con, hdr, &skip); + mutex_lock(&con->mutex); +- if (!msg || *skip) +- return NULL; ++ if (skip) ++ con->in_msg = NULL; ++ ++ if (!con->in_msg) ++ return skip != 0; + } +- if (!msg) { +- *skip = 0; +- msg = ceph_msg_new(type, front_len, GFP_NOFS, false); +- if (!msg) { ++ if (!con->in_msg) { ++ con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false); ++ if (!con->in_msg) { + pr_err("unable to allocate msg type %d len %d\n", + type, front_len); +- return NULL; ++ return false; + } +- msg->page_alignment = le16_to_cpu(hdr->data_off); ++ con->in_msg->page_alignment = le16_to_cpu(hdr->data_off); + } +- memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); ++ memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); + +- if (middle_len && !msg->middle) { +- ret = ceph_alloc_middle(con, msg); ++ if (middle_len && !con->in_msg->middle) { ++ ret = ceph_alloc_middle(con, con->in_msg); + if (ret < 0) { +- ceph_msg_put(msg); +- return NULL; ++ ceph_msg_put(con->in_msg); ++ con->in_msg = NULL; + } + } + +- return msg; ++ return false; + } + + +--- a/net/ceph/mon_client.c ++++ b/net/ceph/mon_client.c +@@ -442,6 +442,7 @@ static struct ceph_msg *get_generic_repl + m = NULL; + } else { + dout("get_generic_reply %lld got %p\n", tid, req->reply); ++ *skip = 0; + m = ceph_msg_get(req->reply); + /* + * we don't need to track the connection reading into +@@ -990,6 +991,8 @@ static struct ceph_msg *mon_alloc_msg(st + case CEPH_MSG_MDS_MAP: + case CEPH_MSG_OSD_MAP: + m = ceph_msg_new(type, front_len, GFP_NOFS, false); ++ if (!m) ++ return NULL; /* ENOMEM--return skip == 0 */ + break; + } + +--- a/net/ceph/osd_client.c ++++ b/net/ceph/osd_client.c +@@ -2077,6 +2077,7 @@ static struct ceph_msg *alloc_msg(struct + int type = le16_to_cpu(hdr->type); + int front = le32_to_cpu(hdr->front_len); + ++ *skip = 0; + switch (type) { + case CEPH_MSG_OSD_MAP: + case CEPH_MSG_WATCH_NOTIFY: diff --git a/queue-3.4/0052-libceph-have-messages-point-to-their-connection.patch b/queue-3.4/0052-libceph-have-messages-point-to-their-connection.patch new file mode 100644 index 00000000000..d76394ca6ba --- /dev/null +++ b/queue-3.4/0052-libceph-have-messages-point-to-their-connection.patch @@ -0,0 +1,154 @@ +From 48588cda937cf200d7cf89fbb74e59449a389de8 Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Fri, 1 Jun 2012 14:56:43 -0500 +Subject: libceph: have messages point to their connection + +From: Alex Elder + +(cherry picked from commit 38941f8031bf042dba3ced6394ba3a3b16c244ea) + +When a ceph message is queued for sending it is placed on a list of +pending messages (ceph_connection->out_queue). When they are +actually sent over the wire, they are moved from that list to +another (ceph_connection->out_sent). When acknowledgement for the +message is received, it is removed from the sent messages list. + +During that entire time the message is "in the possession" of a +single ceph connection. Keep track of that connection in the +message. This will be used in the next patch (and is a helpful +bit of information for debugging anyway). + +Signed-off-by: Alex Elder +Reviewed-by: Sage Weil +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/ceph/messenger.h | 3 +++ + net/ceph/messenger.c | 27 +++++++++++++++++++++++++-- + 2 files changed, 28 insertions(+), 2 deletions(-) + +--- a/include/linux/ceph/messenger.h ++++ b/include/linux/ceph/messenger.h +@@ -77,7 +77,10 @@ struct ceph_msg { + unsigned nr_pages; /* size of page array */ + unsigned page_alignment; /* io offset in first page */ + struct ceph_pagelist *pagelist; /* instead of pages */ ++ ++ struct ceph_connection *con; + struct list_head list_head; ++ + struct kref kref; + struct bio *bio; /* instead of pages/pagelist */ + struct bio *bio_iter; /* bio iterator */ +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -414,6 +414,9 @@ static int con_close_socket(struct ceph_ + static void ceph_msg_remove(struct ceph_msg *msg) + { + list_del_init(&msg->list_head); ++ BUG_ON(msg->con == NULL); ++ msg->con = NULL; ++ + ceph_msg_put(msg); + } + static void ceph_msg_remove_list(struct list_head *head) +@@ -433,6 +436,8 @@ static void reset_connection(struct ceph + ceph_msg_remove_list(&con->out_sent); + + if (con->in_msg) { ++ BUG_ON(con->in_msg->con != con); ++ con->in_msg->con = NULL; + ceph_msg_put(con->in_msg); + con->in_msg = NULL; + } +@@ -625,8 +630,10 @@ static void prepare_write_message(struct + &con->out_temp_ack); + } + ++ BUG_ON(list_empty(&con->out_queue)); + m = list_first_entry(&con->out_queue, struct ceph_msg, list_head); + con->out_msg = m; ++ BUG_ON(m->con != con); + + /* put message on sent list */ + ceph_msg_get(m); +@@ -1810,6 +1817,8 @@ static int read_partial_message(struct c + "error allocating memory for incoming message"; + return -ENOMEM; + } ++ ++ BUG_ON(con->in_msg->con != con); + m = con->in_msg; + m->front.iov_len = 0; /* haven't read it yet */ + if (m->middle) +@@ -1905,6 +1914,8 @@ static void process_message(struct ceph_ + { + struct ceph_msg *msg; + ++ BUG_ON(con->in_msg->con != con); ++ con->in_msg->con = NULL; + msg = con->in_msg; + con->in_msg = NULL; + +@@ -2264,6 +2275,8 @@ static void ceph_fault(struct ceph_conne + con_close_socket(con); + + if (con->in_msg) { ++ BUG_ON(con->in_msg->con != con); ++ con->in_msg->con = NULL; + ceph_msg_put(con->in_msg); + con->in_msg = NULL; + } +@@ -2382,6 +2395,8 @@ void ceph_con_send(struct ceph_connectio + + /* queue */ + mutex_lock(&con->mutex); ++ BUG_ON(msg->con != NULL); ++ msg->con = con; + BUG_ON(!list_empty(&msg->list_head)); + list_add_tail(&msg->list_head, &con->out_queue); + dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg, +@@ -2407,13 +2422,16 @@ void ceph_con_revoke(struct ceph_connect + { + mutex_lock(&con->mutex); + if (!list_empty(&msg->list_head)) { +- dout("con_revoke %p msg %p - was on queue\n", con, msg); ++ dout("%s %p msg %p - was on queue\n", __func__, con, msg); + list_del_init(&msg->list_head); ++ BUG_ON(msg->con == NULL); ++ msg->con = NULL; ++ + ceph_msg_put(msg); + msg->hdr.seq = 0; + } + if (con->out_msg == msg) { +- dout("con_revoke %p msg %p - was sending\n", con, msg); ++ dout("%s %p msg %p - was sending\n", __func__, con, msg); + con->out_msg = NULL; + if (con->out_kvec_is_msg) { + con->out_skip = con->out_kvec_bytes; +@@ -2482,6 +2500,8 @@ struct ceph_msg *ceph_msg_new(int type, + if (m == NULL) + goto out; + kref_init(&m->kref); ++ ++ m->con = NULL; + INIT_LIST_HEAD(&m->list_head); + + m->hdr.tid = 0; +@@ -2602,6 +2622,8 @@ static bool ceph_con_in_msg_alloc(struct + mutex_unlock(&con->mutex); + con->in_msg = con->ops->alloc_msg(con, hdr, &skip); + mutex_lock(&con->mutex); ++ if (con->in_msg) ++ con->in_msg->con = con; + if (skip) + con->in_msg = NULL; + +@@ -2615,6 +2637,7 @@ static bool ceph_con_in_msg_alloc(struct + type, front_len); + return false; + } ++ con->in_msg->con = con; + con->in_msg->page_alignment = le16_to_cpu(hdr->data_off); + } + memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); diff --git a/queue-3.4/0053-libceph-have-messages-take-a-connection-reference.patch b/queue-3.4/0053-libceph-have-messages-take-a-connection-reference.patch new file mode 100644 index 00000000000..88242d400e1 --- /dev/null +++ b/queue-3.4/0053-libceph-have-messages-take-a-connection-reference.patch @@ -0,0 +1,174 @@ +From 898bf051066aaecc79487425b6614fb8e0efca4a Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Mon, 4 Jun 2012 14:43:33 -0500 +Subject: libceph: have messages take a connection reference + +From: Alex Elder + +(cherry picked from commit 92ce034b5a740046cc643a21ea21eaad589e0043) + +There are essentially two types of ceph messages: incoming and +outgoing. Outgoing messages are always allocated via ceph_msg_new(), +and at the time of their allocation they are not associated with any +particular connection. Incoming messages are always allocated via +ceph_con_in_msg_alloc(), and they are initially associated with the +connection from which incoming data will be placed into the message. + +When an outgoing message gets sent, it becomes associated with a +connection and remains that way until the message is successfully +sent. The association of an incoming message goes away at the point +it is sent to an upper layer via a con->ops->dispatch method. + +This patch implements reference counting for all ceph messages, such +that every message holds a reference (and a pointer) to a connection +if and only if it is associated with that connection (as described +above). + +For background, here is an explanation of the ceph message +lifecycle, emphasizing when an association exists between a message +and a connection. + +Outgoing Messages +An outgoing message is "owned" by its allocator, from the time it is +allocated in ceph_msg_new() up to the point it gets queued for +sending in ceph_con_send(). Prior to that point the message's +msg->con pointer is null; at the point it is queued for sending its +message pointer is assigned to refer to the connection. At that +time the message is inserted into a connection's out_queue list. + +When a message on the out_queue list has been sent to the socket +layer to be put on the wire, it is transferred out of that list and +into the connection's out_sent list. At that point it is still owned +by the connection, and will remain so until an acknowledgement is +received from the recipient that indicates the message was +successfully transferred. When such an acknowledgement is received +(in process_ack()), the message is removed from its list (in +ceph_msg_remove()), at which point it is no longer associated with +the connection. + +So basically, any time a message is on one of a connection's lists, +it is associated with that connection. Reference counting outgoing +messages can thus be done at the points a message is added to the +out_queue (in ceph_con_send()) and the point it is removed from +either its two lists (in ceph_msg_remove())--at which point its +connection pointer becomes null. + +Incoming Messages +When an incoming message on a connection is getting read (in +read_partial_message()) and there is no message in con->in_msg, +a new one is allocated using ceph_con_in_msg_alloc(). At that +point the message is associated with the connection. Once that +message has been completely and successfully read, it is passed to +upper layer code using the connection's con->ops->dispatch method. +At that point the association between the message and the connection +no longer exists. + +Reference counting of connections for incoming messages can be done +by taking a reference to the connection when the message gets +allocated, and releasing that reference when it gets handed off +using the dispatch method. + +We should never fail to get a connection reference for a +message--the since the caller should already hold one. + +Signed-off-by: Alex Elder +Reviewed-by: Sage Weil +Signed-off-by: Greg Kroah-Hartman +--- + net/ceph/messenger.c | 24 ++++++++++++++++++------ + 1 file changed, 18 insertions(+), 6 deletions(-) + +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -415,6 +415,7 @@ static void ceph_msg_remove(struct ceph_ + { + list_del_init(&msg->list_head); + BUG_ON(msg->con == NULL); ++ ceph_con_put(msg->con); + msg->con = NULL; + + ceph_msg_put(msg); +@@ -440,6 +441,7 @@ static void reset_connection(struct ceph + con->in_msg->con = NULL; + ceph_msg_put(con->in_msg); + con->in_msg = NULL; ++ ceph_con_put(con->in_msg->con); + } + + con->connect_seq = 0; +@@ -1918,6 +1920,7 @@ static void process_message(struct ceph_ + con->in_msg->con = NULL; + msg = con->in_msg; + con->in_msg = NULL; ++ ceph_con_put(con); + + /* if first message, set peer_name */ + if (con->peer_name.type == 0) +@@ -2279,6 +2282,7 @@ static void ceph_fault(struct ceph_conne + con->in_msg->con = NULL; + ceph_msg_put(con->in_msg); + con->in_msg = NULL; ++ ceph_con_put(con); + } + + /* Requeue anything that hasn't been acked */ +@@ -2395,8 +2399,11 @@ void ceph_con_send(struct ceph_connectio + + /* queue */ + mutex_lock(&con->mutex); ++ + BUG_ON(msg->con != NULL); +- msg->con = con; ++ msg->con = ceph_con_get(con); ++ BUG_ON(msg->con == NULL); ++ + BUG_ON(!list_empty(&msg->list_head)); + list_add_tail(&msg->list_head, &con->out_queue); + dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg, +@@ -2425,10 +2432,11 @@ void ceph_con_revoke(struct ceph_connect + dout("%s %p msg %p - was on queue\n", __func__, con, msg); + list_del_init(&msg->list_head); + BUG_ON(msg->con == NULL); ++ ceph_con_put(msg->con); + msg->con = NULL; ++ msg->hdr.seq = 0; + + ceph_msg_put(msg); +- msg->hdr.seq = 0; + } + if (con->out_msg == msg) { + dout("%s %p msg %p - was sending\n", __func__, con, msg); +@@ -2437,8 +2445,9 @@ void ceph_con_revoke(struct ceph_connect + con->out_skip = con->out_kvec_bytes; + con->out_kvec_is_msg = false; + } +- ceph_msg_put(msg); + msg->hdr.seq = 0; ++ ++ ceph_msg_put(msg); + } + mutex_unlock(&con->mutex); + } +@@ -2622,8 +2631,10 @@ static bool ceph_con_in_msg_alloc(struct + mutex_unlock(&con->mutex); + con->in_msg = con->ops->alloc_msg(con, hdr, &skip); + mutex_lock(&con->mutex); +- if (con->in_msg) +- con->in_msg->con = con; ++ if (con->in_msg) { ++ con->in_msg->con = ceph_con_get(con); ++ BUG_ON(con->in_msg->con == NULL); ++ } + if (skip) + con->in_msg = NULL; + +@@ -2637,7 +2648,8 @@ static bool ceph_con_in_msg_alloc(struct + type, front_len); + return false; + } +- con->in_msg->con = con; ++ con->in_msg->con = ceph_con_get(con); ++ BUG_ON(con->in_msg->con == NULL); + con->in_msg->page_alignment = le16_to_cpu(hdr->data_off); + } + memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); diff --git a/queue-3.4/0054-libceph-make-ceph_con_revoke-a-msg-operation.patch b/queue-3.4/0054-libceph-make-ceph_con_revoke-a-msg-operation.patch new file mode 100644 index 00000000000..783fa3f1e27 --- /dev/null +++ b/queue-3.4/0054-libceph-make-ceph_con_revoke-a-msg-operation.patch @@ -0,0 +1,121 @@ +From 0ca876aefb93f1bda1b194af80ed90def4a21768 Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Fri, 1 Jun 2012 14:56:43 -0500 +Subject: libceph: make ceph_con_revoke() a msg operation + +From: Alex Elder + +(cherry picked from commit 6740a845b2543cc46e1902ba21bac743fbadd0dc) + +ceph_con_revoke() is passed both a message and a ceph connection. +Now that any message associated with a connection holds a pointer +to that connection, there's no need to provide the connection when +revoking a message. + +This has the added benefit of precluding the possibility of the +providing the wrong connection pointer. If the message's connection +pointer is null, it is not being tracked by any connection, so +revoking it is a no-op. This is supported as a convenience for +upper layers, so they can revoke a message that is not actually +"in flight." + +Rename the function ceph_msg_revoke() to reflect that it is really +an operation on a message, not a connection. + +Signed-off-by: Alex Elder +Reviewed-by: Sage Weil +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/ceph/messenger.h | 3 ++- + net/ceph/messenger.c | 7 ++++++- + net/ceph/mon_client.c | 8 ++++---- + net/ceph/osd_client.c | 4 ++-- + 4 files changed, 14 insertions(+), 8 deletions(-) + +--- a/include/linux/ceph/messenger.h ++++ b/include/linux/ceph/messenger.h +@@ -231,7 +231,8 @@ extern void ceph_con_open(struct ceph_co + extern bool ceph_con_opened(struct ceph_connection *con); + extern void ceph_con_close(struct ceph_connection *con); + extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg); +-extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg); ++ ++extern void ceph_msg_revoke(struct ceph_msg *msg); + extern void ceph_con_revoke_message(struct ceph_connection *con, + struct ceph_msg *msg); + extern void ceph_con_keepalive(struct ceph_connection *con); +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -2425,8 +2425,13 @@ EXPORT_SYMBOL(ceph_con_send); + /* + * Revoke a message that was previously queued for send + */ +-void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg) ++void ceph_msg_revoke(struct ceph_msg *msg) + { ++ struct ceph_connection *con = msg->con; ++ ++ if (!con) ++ return; /* Message not in our possession */ ++ + mutex_lock(&con->mutex); + if (!list_empty(&msg->list_head)) { + dout("%s %p msg %p - was on queue\n", __func__, con, msg); +--- a/net/ceph/mon_client.c ++++ b/net/ceph/mon_client.c +@@ -106,7 +106,7 @@ static void __send_prepared_auth_request + monc->pending_auth = 1; + monc->m_auth->front.iov_len = len; + monc->m_auth->hdr.front_len = cpu_to_le32(len); +- ceph_con_revoke(&monc->con, monc->m_auth); ++ ceph_msg_revoke(monc->m_auth); + ceph_msg_get(monc->m_auth); /* keep our ref */ + ceph_con_send(&monc->con, monc->m_auth); + } +@@ -117,7 +117,7 @@ static void __send_prepared_auth_request + static void __close_session(struct ceph_mon_client *monc) + { + dout("__close_session closing mon%d\n", monc->cur_mon); +- ceph_con_revoke(&monc->con, monc->m_auth); ++ ceph_msg_revoke(monc->m_auth); + ceph_con_close(&monc->con); + monc->con.private = NULL; + monc->cur_mon = -1; +@@ -229,7 +229,7 @@ static void __send_subscribe(struct ceph + + msg->front.iov_len = p - msg->front.iov_base; + msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); +- ceph_con_revoke(&monc->con, msg); ++ ceph_msg_revoke(msg); + ceph_con_send(&monc->con, ceph_msg_get(msg)); + + monc->sub_sent = jiffies | 1; /* never 0 */ +@@ -688,7 +688,7 @@ static void __resend_generic_request(str + + for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) { + req = rb_entry(p, struct ceph_mon_generic_request, node); +- ceph_con_revoke(&monc->con, req->request); ++ ceph_msg_revoke(req->request); + ceph_con_send(&monc->con, ceph_msg_get(req->request)); + } + } +--- a/net/ceph/osd_client.c ++++ b/net/ceph/osd_client.c +@@ -852,7 +852,7 @@ static void __unregister_request(struct + + if (req->r_osd) { + /* make sure the original request isn't in flight. */ +- ceph_con_revoke(&req->r_osd->o_con, req->r_request); ++ ceph_msg_revoke(req->r_request); + + list_del_init(&req->r_osd_item); + if (list_empty(&req->r_osd->o_requests) && +@@ -879,7 +879,7 @@ static void __unregister_request(struct + static void __cancel_request(struct ceph_osd_request *req) + { + if (req->r_sent && req->r_osd) { +- ceph_con_revoke(&req->r_osd->o_con, req->r_request); ++ ceph_msg_revoke(req->r_request); + req->r_sent = 0; + } + } diff --git a/queue-3.4/0055-libceph-make-ceph_con_revoke_message-a-msg-op.patch b/queue-3.4/0055-libceph-make-ceph_con_revoke_message-a-msg-op.patch new file mode 100644 index 00000000000..4c9cee02ca5 --- /dev/null +++ b/queue-3.4/0055-libceph-make-ceph_con_revoke_message-a-msg-op.patch @@ -0,0 +1,120 @@ +From ede3074a05d4669f799c8887291c940e8f86849b Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Fri, 1 Jun 2012 14:56:43 -0500 +Subject: libceph: make ceph_con_revoke_message() a msg op + +From: Alex Elder + +(cherry picked from commit 8921d114f5574c6da2cdd00749d185633ecf88f3) + +ceph_con_revoke_message() is passed both a message and a ceph +connection. A ceph_msg allocated for incoming messages on a +connection always has a pointer to that connection, so there's no +need to provide the connection when revoking such a message. + +Note that the existing logic does not preclude the message supplied +being a null/bogus message pointer. The only user of this interface +is the OSD client, and the only value an osd client passes is a +request's r_reply field. That is always non-null (except briefly in +an error path in ceph_osdc_alloc_request(), and that drops the +only reference so the request won't ever have a reply to revoke). +So we can safely assume the passed-in message is non-null, but add a +BUG_ON() to make it very obvious we are imposing this restriction. + +Rename the function ceph_msg_revoke_incoming() to reflect that it is +really an operation on an incoming message. + +Signed-off-by: Alex Elder +Reviewed-by: Sage Weil +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/ceph/messenger.h | 4 ++-- + net/ceph/messenger.c | 22 ++++++++++++++++------ + net/ceph/osd_client.c | 9 ++++----- + 3 files changed, 22 insertions(+), 13 deletions(-) + +--- a/include/linux/ceph/messenger.h ++++ b/include/linux/ceph/messenger.h +@@ -233,8 +233,8 @@ extern void ceph_con_close(struct ceph_c + extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg); + + extern void ceph_msg_revoke(struct ceph_msg *msg); +-extern void ceph_con_revoke_message(struct ceph_connection *con, +- struct ceph_msg *msg); ++extern void ceph_msg_revoke_incoming(struct ceph_msg *msg); ++ + extern void ceph_con_keepalive(struct ceph_connection *con); + extern struct ceph_connection *ceph_con_get(struct ceph_connection *con); + extern void ceph_con_put(struct ceph_connection *con); +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -2460,17 +2460,27 @@ void ceph_msg_revoke(struct ceph_msg *ms + /* + * Revoke a message that we may be reading data into + */ +-void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) ++void ceph_msg_revoke_incoming(struct ceph_msg *msg) + { ++ struct ceph_connection *con; ++ ++ BUG_ON(msg == NULL); ++ if (!msg->con) { ++ dout("%s msg %p null con\n", __func__, msg); ++ ++ return; /* Message not in our possession */ ++ } ++ ++ con = msg->con; + mutex_lock(&con->mutex); +- if (con->in_msg && con->in_msg == msg) { ++ if (con->in_msg == msg) { + unsigned front_len = le32_to_cpu(con->in_hdr.front_len); + unsigned middle_len = le32_to_cpu(con->in_hdr.middle_len); + unsigned data_len = le32_to_cpu(con->in_hdr.data_len); + + /* skip rest of message */ +- dout("con_revoke_pages %p msg %p revoked\n", con, msg); +- con->in_base_pos = con->in_base_pos - ++ dout("%s %p msg %p revoked\n", __func__, con, msg); ++ con->in_base_pos = con->in_base_pos - + sizeof(struct ceph_msg_header) - + front_len - + middle_len - +@@ -2481,8 +2491,8 @@ void ceph_con_revoke_message(struct ceph + con->in_tag = CEPH_MSGR_TAG_READY; + con->in_seq++; + } else { +- dout("con_revoke_pages %p msg %p pages %p no-op\n", +- con, con->in_msg, msg); ++ dout("%s %p in_msg %p msg %p no-op\n", ++ __func__, con, con->in_msg, msg); + } + mutex_unlock(&con->mutex); + } +--- a/net/ceph/osd_client.c ++++ b/net/ceph/osd_client.c +@@ -140,10 +140,9 @@ void ceph_osdc_release_request(struct kr + if (req->r_request) + ceph_msg_put(req->r_request); + if (req->r_con_filling_msg) { +- dout("release_request revoking pages %p from con %p\n", ++ dout("%s revoking pages %p from con %p\n", __func__, + req->r_pages, req->r_con_filling_msg); +- ceph_con_revoke_message(req->r_con_filling_msg, +- req->r_reply); ++ ceph_msg_revoke_incoming(req->r_reply); + req->r_con_filling_msg->ops->put(req->r_con_filling_msg); + } + if (req->r_reply) +@@ -2022,9 +2021,9 @@ static struct ceph_msg *get_reply(struct + } + + if (req->r_con_filling_msg) { +- dout("get_reply revoking msg %p from old con %p\n", ++ dout("%s revoking msg %p from old con %p\n", __func__, + req->r_reply, req->r_con_filling_msg); +- ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); ++ ceph_msg_revoke_incoming(req->r_reply); + req->r_con_filling_msg->ops->put(req->r_con_filling_msg); + req->r_con_filling_msg = NULL; + } diff --git a/queue-3.4/0056-libceph-fix-overflow-in-__decode_pool_names.patch b/queue-3.4/0056-libceph-fix-overflow-in-__decode_pool_names.patch new file mode 100644 index 00000000000..e6eeecd4316 --- /dev/null +++ b/queue-3.4/0056-libceph-fix-overflow-in-__decode_pool_names.patch @@ -0,0 +1,51 @@ +From d87d591772b2956b9ac9e25eb499366100d2c4a8 Mon Sep 17 00:00:00 2001 +From: Xi Wang +Date: Wed, 6 Jun 2012 19:35:55 -0500 +Subject: libceph: fix overflow in __decode_pool_names() + +From: Xi Wang + +(cherry picked from commit ad3b904c07dfa88603689bf9a67bffbb9b99beb5) + +`len' is read from network and thus needs validation. Otherwise a +large `len' would cause out-of-bounds access via the memcpy() call. +In addition, len = 0xffffffff would overflow the kmalloc() size, +leading to out-of-bounds write. + +This patch adds a check of `len' via ceph_decode_need(). Also use +kstrndup rather than kmalloc/memcpy. + +[elder@inktank.com: added -ENOMEM return for null kstrndup() result] + +Signed-off-by: Xi Wang +Reviewed-by: Alex Elder +Signed-off-by: Greg Kroah-Hartman +--- + net/ceph/osdmap.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +--- a/net/ceph/osdmap.c ++++ b/net/ceph/osdmap.c +@@ -495,15 +495,16 @@ static int __decode_pool_names(void **p, + ceph_decode_32_safe(p, end, pool, bad); + ceph_decode_32_safe(p, end, len, bad); + dout(" pool %d len %d\n", pool, len); ++ ceph_decode_need(p, end, len, bad); + pi = __lookup_pg_pool(&map->pg_pools, pool); + if (pi) { ++ char *name = kstrndup(*p, len, GFP_NOFS); ++ ++ if (!name) ++ return -ENOMEM; + kfree(pi->name); +- pi->name = kmalloc(len + 1, GFP_NOFS); +- if (pi->name) { +- memcpy(pi->name, *p, len); +- pi->name[len] = '\0'; +- dout(" name is %s\n", pi->name); +- } ++ pi->name = name; ++ dout(" name is %s\n", pi->name); + } + *p += len; + } diff --git a/queue-3.4/0057-libceph-fix-overflow-in-osdmap_decode.patch b/queue-3.4/0057-libceph-fix-overflow-in-osdmap_decode.patch new file mode 100644 index 00000000000..0810782fe27 --- /dev/null +++ b/queue-3.4/0057-libceph-fix-overflow-in-osdmap_decode.patch @@ -0,0 +1,32 @@ +From cc725c099f905095dfa2fe50c46575096ff0052d Mon Sep 17 00:00:00 2001 +From: Xi Wang +Date: Wed, 6 Jun 2012 19:35:55 -0500 +Subject: libceph: fix overflow in osdmap_decode() + +From: Xi Wang + +(cherry picked from commit e91a9b639a691e0982088b5954eaafb5a25c8f1c) + +On 32-bit systems, a large `n' would overflow `n * sizeof(u32)' and bypass +the check ceph_decode_need(p, end, n * sizeof(u32), bad). It would also +overflow the subsequent kmalloc() size, leading to out-of-bounds write. + +Signed-off-by: Xi Wang +Reviewed-by: Alex Elder +Signed-off-by: Greg Kroah-Hartman +--- + net/ceph/osdmap.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/ceph/osdmap.c ++++ b/net/ceph/osdmap.c +@@ -674,6 +674,9 @@ struct ceph_osdmap *osdmap_decode(void * + ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); + ceph_decode_copy(p, &pgid, sizeof(pgid)); + n = ceph_decode_32(p); ++ err = -EINVAL; ++ if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) ++ goto bad; + ceph_decode_need(p, end, n * sizeof(u32), bad); + err = -ENOMEM; + pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS); diff --git a/queue-3.4/0058-libceph-fix-overflow-in-osdmap_apply_incremental.patch b/queue-3.4/0058-libceph-fix-overflow-in-osdmap_apply_incremental.patch new file mode 100644 index 00000000000..22353a21a38 --- /dev/null +++ b/queue-3.4/0058-libceph-fix-overflow-in-osdmap_apply_incremental.patch @@ -0,0 +1,34 @@ +From 1edb6147d0adbb0d51bed7c7432f59bbc41f8c82 Mon Sep 17 00:00:00 2001 +From: Xi Wang +Date: Wed, 6 Jun 2012 19:35:55 -0500 +Subject: libceph: fix overflow in osdmap_apply_incremental() + +From: Xi Wang + +(cherry picked from commit a5506049500b30dbc5edb4d07a3577477c1f3643) + +On 32-bit systems, a large `pglen' would overflow `pglen*sizeof(u32)' +and bypass the check ceph_decode_need(p, end, pglen*sizeof(u32), bad). +It would also overflow the subsequent kmalloc() size, leading to +out-of-bounds write. + +Signed-off-by: Xi Wang +Reviewed-by: Alex Elder +Signed-off-by: Greg Kroah-Hartman +--- + net/ceph/osdmap.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/ceph/osdmap.c ++++ b/net/ceph/osdmap.c +@@ -900,6 +900,10 @@ struct ceph_osdmap *osdmap_apply_increme + (void) __remove_pg_mapping(&map->pg_temp, pgid); + + /* insert */ ++ if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) { ++ err = -EINVAL; ++ goto bad; ++ } + pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS); + if (!pg) { + err = -ENOMEM; diff --git a/queue-3.4/0059-libceph-transition-socket-state-prior-to-actual-conn.patch b/queue-3.4/0059-libceph-transition-socket-state-prior-to-actual-conn.patch new file mode 100644 index 00000000000..cd5d0f8c31e --- /dev/null +++ b/queue-3.4/0059-libceph-transition-socket-state-prior-to-actual-conn.patch @@ -0,0 +1,43 @@ +From aa868bb25ad02c63c69783a9ffa1f6f1d3e98a5d Mon Sep 17 00:00:00 2001 +From: Sage Weil +Date: Sat, 9 Jun 2012 14:19:21 -0700 +Subject: libceph: transition socket state prior to actual connect + +From: Sage Weil + +(cherry picked from commit 89a86be0ce20022f6ede8bccec078dbb3d63caaa) + +Once we call ->connect(), we are racing against the actual +connection, and a subsequent transition from CONNECTING -> +CONNECTED. Set the state to CONNECTING before that, under the +protection of the mutex, to avoid the race. + +This was introduced in 928443cd9644e7cfd46f687dbeffda2d1a357ff9, +with the original socket state code. + +Signed-off-by: Sage Weil +Reviewed-by: Alex Elder +Signed-off-by: Greg Kroah-Hartman +--- + net/ceph/messenger.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -321,6 +321,7 @@ static int ceph_tcp_connect(struct ceph_ + + dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); + ++ con_sock_state_connecting(con); + ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), + O_NONBLOCK); + if (ret == -EINPROGRESS) { +@@ -336,8 +337,6 @@ static int ceph_tcp_connect(struct ceph_ + return ret; + } + con->sock = sock; +- con_sock_state_connecting(con); +- + return 0; + } + diff --git a/queue-3.4/series b/queue-3.4/series index a0104435621..a2f21cda4a2 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -92,3 +92,23 @@ selinux-fix-sel_netnode_insert-suspicious-rcu-dereference.patch 0037-ceph-check-PG_Private-flag-before-accessing-page-pri.patch 0038-libceph-eliminate-connection-state-DEAD.patch 0039-libceph-kill-bad_proto-ceph-connection-op.patch +0040-libceph-rename-socket-callbacks.patch +0041-libceph-rename-kvec_reset-and-kvec_add-functions.patch +0042-libceph-embed-ceph-messenger-structure-in-ceph_clien.patch +0043-libceph-start-separating-connection-flags-from-state.patch +0044-libceph-start-tracking-connection-socket-state.patch +0045-libceph-provide-osd-number-when-creating-osd.patch +0046-libceph-set-CLOSED-state-bit-in-con_init.patch +0047-libceph-embed-ceph-connection-structure-in-mon_clien.patch +0048-libceph-drop-connection-refcounting-for-mon_client.patch +0049-libceph-init-monitor-connection-when-opening.patch +0050-libceph-fully-initialize-connection-in-con_init.patch +0051-libceph-tweak-ceph_alloc_msg.patch +0052-libceph-have-messages-point-to-their-connection.patch +0053-libceph-have-messages-take-a-connection-reference.patch +0054-libceph-make-ceph_con_revoke-a-msg-operation.patch +0055-libceph-make-ceph_con_revoke_message-a-msg-op.patch +0056-libceph-fix-overflow-in-__decode_pool_names.patch +0057-libceph-fix-overflow-in-osdmap_decode.patch +0058-libceph-fix-overflow-in-osdmap_apply_incremental.patch +0059-libceph-transition-socket-state-prior-to-actual-conn.patch -- 2.47.3