--- /dev/null
+From c08138f57dc37880110c91afcb7403f3d7a8aa3c Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Tue, 19 Jun 2012 08:52:33 -0500
+Subject: libceph: fix NULL dereference in reset_connection()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+(cherry picked from commit 26ce171915f348abd1f41da1ed139d93750d987f)
+
+We dereference "con->in_msg" on the line after it was set to NULL.
+
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -440,7 +440,7 @@ static void reset_connection(struct ceph
+ con->in_msg->con = NULL;
+ ceph_msg_put(con->in_msg);
+ con->in_msg = NULL;
+- ceph_con_put(con->in_msg->con);
++ ceph_con_put(con);
+ }
+
+ con->connect_seq = 0;
--- /dev/null
+From 6df35f27bf01c07b95aa9b93657f5b00b919e231 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Thu, 21 Jun 2012 12:47:08 -0700
+Subject: libceph: use con get/put methods
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 36eb71aa57e6a33d61fd90a2fd87f00c6844bc86)
+
+The ceph_con_get/put() helpers manipulate the embedded con ref
+count, which isn't used now that ceph_connections are embedded in
+other structures.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -414,7 +414,7 @@ static void ceph_msg_remove(struct ceph_
+ {
+ list_del_init(&msg->list_head);
+ BUG_ON(msg->con == NULL);
+- ceph_con_put(msg->con);
++ msg->con->ops->put(msg->con);
+ msg->con = NULL;
+
+ ceph_msg_put(msg);
+@@ -440,7 +440,7 @@ static void reset_connection(struct ceph
+ con->in_msg->con = NULL;
+ ceph_msg_put(con->in_msg);
+ con->in_msg = NULL;
+- ceph_con_put(con);
++ con->ops->put(con);
+ }
+
+ con->connect_seq = 0;
+@@ -1919,7 +1919,7 @@ static void process_message(struct ceph_
+ con->in_msg->con = NULL;
+ msg = con->in_msg;
+ con->in_msg = NULL;
+- ceph_con_put(con);
++ con->ops->put(con);
+
+ /* if first message, set peer_name */
+ if (con->peer_name.type == 0)
+@@ -2281,7 +2281,7 @@ static void ceph_fault(struct ceph_conne
+ con->in_msg->con = NULL;
+ ceph_msg_put(con->in_msg);
+ con->in_msg = NULL;
+- ceph_con_put(con);
++ con->ops->put(con);
+ }
+
+ /* Requeue anything that hasn't been acked */
+@@ -2400,7 +2400,7 @@ void ceph_con_send(struct ceph_connectio
+ mutex_lock(&con->mutex);
+
+ BUG_ON(msg->con != NULL);
+- msg->con = ceph_con_get(con);
++ msg->con = con->ops->get(con);
+ BUG_ON(msg->con == NULL);
+
+ BUG_ON(!list_empty(&msg->list_head));
+@@ -2436,7 +2436,7 @@ void ceph_msg_revoke(struct ceph_msg *ms
+ dout("%s %p msg %p - was on queue\n", __func__, con, msg);
+ list_del_init(&msg->list_head);
+ BUG_ON(msg->con == NULL);
+- ceph_con_put(msg->con);
++ msg->con->ops->put(msg->con);
+ msg->con = NULL;
+ msg->hdr.seq = 0;
+
+@@ -2646,7 +2646,7 @@ static bool ceph_con_in_msg_alloc(struct
+ con->in_msg = con->ops->alloc_msg(con, hdr, &skip);
+ mutex_lock(&con->mutex);
+ if (con->in_msg) {
+- con->in_msg->con = ceph_con_get(con);
++ con->in_msg->con = con->ops->get(con);
+ BUG_ON(con->in_msg->con == NULL);
+ }
+ if (skip)
+@@ -2662,7 +2662,7 @@ static bool ceph_con_in_msg_alloc(struct
+ type, front_len);
+ return false;
+ }
+- con->in_msg->con = ceph_con_get(con);
++ con->in_msg->con = con->ops->get(con);
+ BUG_ON(con->in_msg->con == NULL);
+ con->in_msg->page_alignment = le16_to_cpu(hdr->data_off);
+ }
--- /dev/null
+From ecf2281ca8547a7a45fcb2a9f9010219f96899d3 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Thu, 21 Jun 2012 12:49:23 -0700
+Subject: libceph: drop ceph_con_get/put helpers and nref member
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit d59315ca8c0de00df9b363f94a2641a30961ca1c)
+
+These are no longer used. Every ceph_connection instance is embedded in
+another structure, and refcounts manipulated via the get/put ops.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/messenger.h | 1 -
+ net/ceph/messenger.c | 28 +---------------------------
+ 2 files changed, 1 insertion(+), 28 deletions(-)
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -135,7 +135,6 @@ struct ceph_msg_pos {
+ */
+ struct ceph_connection {
+ void *private;
+- atomic_t nref;
+
+ const struct ceph_connection_operations *ops;
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -501,30 +501,6 @@ bool ceph_con_opened(struct ceph_connect
+ }
+
+ /*
+- * generic get/put
+- */
+-struct ceph_connection *ceph_con_get(struct ceph_connection *con)
+-{
+- int nref = __atomic_add_unless(&con->nref, 1, 0);
+-
+- dout("con_get %p nref = %d -> %d\n", con, nref, nref + 1);
+-
+- return nref ? con : NULL;
+-}
+-
+-void ceph_con_put(struct ceph_connection *con)
+-{
+- int nref = atomic_dec_return(&con->nref);
+-
+- BUG_ON(nref < 0);
+- if (nref == 0) {
+- BUG_ON(con->sock);
+- kfree(con);
+- }
+- dout("con_put %p nref = %d -> %d\n", con, nref + 1, nref);
+-}
+-
+-/*
+ * initialize a new connection.
+ */
+ void ceph_con_init(struct ceph_connection *con, void *private,
+@@ -535,7 +511,6 @@ void ceph_con_init(struct ceph_connectio
+ memset(con, 0, sizeof(*con));
+ con->private = private;
+ con->ops = ops;
+- atomic_set(&con->nref, 1);
+ con->msgr = msgr;
+
+ con_sock_state_init(con);
+@@ -1951,8 +1926,7 @@ static int try_write(struct ceph_connect
+ {
+ int ret = 1;
+
+- dout("try_write start %p state %lu nref %d\n", con, con->state,
+- atomic_read(&con->nref));
++ dout("try_write start %p state %lu\n", con, con->state);
+
+ more:
+ dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
--- /dev/null
+From 848f392c4eb0a63a35a2a53234cf15340e9cb0fd Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 11 Jun 2012 14:57:13 -0500
+Subject: libceph: encapsulate out message data setup
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 739c905baa018c99003564ebc367d93aa44d4861)
+
+Move the code that prepares to write the data portion of a message
+into its own function.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 37 +++++++++++++++++++++++--------------
+ 1 file changed, 23 insertions(+), 14 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -565,6 +565,24 @@ static void con_out_kvec_add(struct ceph
+ con->out_kvec_bytes += size;
+ }
+
++static void prepare_write_message_data(struct ceph_connection *con)
++{
++ struct ceph_msg *msg = con->out_msg;
++
++ BUG_ON(!msg);
++ BUG_ON(!msg->hdr.data_len);
++
++ /* initialize page iterator */
++ con->out_msg_pos.page = 0;
++ if (msg->pages)
++ con->out_msg_pos.page_pos = msg->page_alignment;
++ else
++ con->out_msg_pos.page_pos = 0;
++ con->out_msg_pos.data_pos = 0;
++ con->out_msg_pos.did_page_crc = false;
++ con->out_more = 1; /* data + footer will follow */
++}
++
+ /*
+ * Prepare footer for currently outgoing message, and finish things
+ * off. Assumes out_kvec* are already valid.. we just add on to the end.
+@@ -657,26 +675,17 @@ static void prepare_write_message(struct
+ con->out_msg->footer.middle_crc = cpu_to_le32(crc);
+ } else
+ con->out_msg->footer.middle_crc = 0;
+- con->out_msg->footer.data_crc = 0;
+- dout("prepare_write_message front_crc %u data_crc %u\n",
++ dout("%s front_crc %u middle_crc %u\n", __func__,
+ le32_to_cpu(con->out_msg->footer.front_crc),
+ le32_to_cpu(con->out_msg->footer.middle_crc));
+
+ /* is there a data payload? */
+- if (le32_to_cpu(m->hdr.data_len) > 0) {
+- /* initialize page iterator */
+- con->out_msg_pos.page = 0;
+- if (m->pages)
+- con->out_msg_pos.page_pos = m->page_alignment;
+- else
+- con->out_msg_pos.page_pos = 0;
+- con->out_msg_pos.data_pos = 0;
+- con->out_msg_pos.did_page_crc = false;
+- con->out_more = 1; /* data + footer will follow */
+- } else {
++ con->out_msg->footer.data_crc = 0;
++ if (m->hdr.data_len)
++ prepare_write_message_data(con);
++ else
+ /* no, queue up footer too and be done */
+ prepare_write_message_footer(con);
+- }
+
+ set_bit(WRITE_PENDING, &con->flags);
+ }
--- /dev/null
+From 1a8d8ec0fb2260e062574c95480e6d74b7d19181 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 11 Jun 2012 14:57:13 -0500
+Subject: libceph: encapsulate advancing msg page
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 84ca8fc87fcf4ab97bb8acdb59bf97bb4820cb14)
+
+In write_partial_msg_pages(), once all the data from a page has been
+sent we advance to the next one. Put the code that takes care of
+this into its own function.
+
+While modifying write_partial_msg_pages(), make its local variable
+"in_trail" be Boolean, and use the local variable "msg" (which is
+just the connection's current out_msg pointer) consistently.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 58 +++++++++++++++++++++++++++++----------------------
+ 1 file changed, 34 insertions(+), 24 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -891,6 +891,33 @@ static void iter_bio_next(struct bio **b
+ }
+ #endif
+
++static void out_msg_pos_next(struct ceph_connection *con, struct page *page,
++ size_t len, size_t sent, bool in_trail)
++{
++ struct ceph_msg *msg = con->out_msg;
++
++ BUG_ON(!msg);
++ BUG_ON(!sent);
++
++ con->out_msg_pos.data_pos += sent;
++ con->out_msg_pos.page_pos += sent;
++ if (sent == len) {
++ con->out_msg_pos.page_pos = 0;
++ con->out_msg_pos.page++;
++ con->out_msg_pos.did_page_crc = false;
++ if (in_trail)
++ list_move_tail(&page->lru,
++ &msg->trail->head);
++ else if (msg->pagelist)
++ list_move_tail(&page->lru,
++ &msg->pagelist->head);
++#ifdef CONFIG_BLOCK
++ else if (msg->bio)
++ iter_bio_next(&msg->bio_iter, &msg->bio_seg);
++#endif
++ }
++}
++
+ /*
+ * Write as much message data payload as we can. If we finish, queue
+ * up the footer.
+@@ -906,11 +933,11 @@ static int write_partial_msg_pages(struc
+ bool do_datacrc = !con->msgr->nocrc;
+ int ret;
+ int total_max_write;
+- int in_trail = 0;
++ bool in_trail = false;
+ size_t trail_len = (msg->trail ? msg->trail->length : 0);
+
+ dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n",
+- con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages,
++ con, msg, con->out_msg_pos.page, msg->nr_pages,
+ con->out_msg_pos.page_pos);
+
+ #ifdef CONFIG_BLOCK
+@@ -934,13 +961,12 @@ static int write_partial_msg_pages(struc
+
+ /* have we reached the trail part of the data? */
+ if (con->out_msg_pos.data_pos >= data_len - trail_len) {
+- in_trail = 1;
++ in_trail = true;
+
+ total_max_write = data_len - con->out_msg_pos.data_pos;
+
+ page = list_first_entry(&msg->trail->head,
+ struct page, lru);
+- max_write = PAGE_SIZE;
+ } else if (msg->pages) {
+ page = msg->pages[con->out_msg_pos.page];
+ } else if (msg->pagelist) {
+@@ -964,14 +990,14 @@ static int write_partial_msg_pages(struc
+ if (do_datacrc && !con->out_msg_pos.did_page_crc) {
+ void *base;
+ u32 crc;
+- u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc);
++ u32 tmpcrc = le32_to_cpu(msg->footer.data_crc);
+ char *kaddr;
+
+ kaddr = kmap(page);
+ BUG_ON(kaddr == NULL);
+ base = kaddr + con->out_msg_pos.page_pos + bio_offset;
+ crc = crc32c(tmpcrc, base, len);
+- con->out_msg->footer.data_crc = cpu_to_le32(crc);
++ msg->footer.data_crc = cpu_to_le32(crc);
+ con->out_msg_pos.did_page_crc = true;
+ }
+ ret = ceph_tcp_sendpage(con->sock, page,
+@@ -984,30 +1010,14 @@ static int write_partial_msg_pages(struc
+ if (ret <= 0)
+ goto out;
+
+- con->out_msg_pos.data_pos += ret;
+- con->out_msg_pos.page_pos += ret;
+- if (ret == len) {
+- con->out_msg_pos.page_pos = 0;
+- con->out_msg_pos.page++;
+- con->out_msg_pos.did_page_crc = false;
+- if (in_trail)
+- list_move_tail(&page->lru,
+- &msg->trail->head);
+- else if (msg->pagelist)
+- list_move_tail(&page->lru,
+- &msg->pagelist->head);
+-#ifdef CONFIG_BLOCK
+- else if (msg->bio)
+- iter_bio_next(&msg->bio_iter, &msg->bio_seg);
+-#endif
+- }
++ out_msg_pos_next(con, page, len, (size_t) ret, in_trail);
+ }
+
+ dout("write_partial_msg_pages %p msg %p done\n", con, msg);
+
+ /* prepare and queue up footer, too */
+ if (!do_datacrc)
+- con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
++ msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
+ con_out_kvec_reset(con);
+ prepare_write_message_footer(con);
+ ret = 1;
--- /dev/null
+From 7ee905ae64c2bb36f281ff8b7579b8a3e249f470 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 11 Jun 2012 14:57:13 -0500
+Subject: libceph: don't mark footer complete before it is
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit fd154f3c75465abd83b7a395033e3755908a1e6e)
+
+This is a nit, but prepare_write_message() sets the FOOTER_COMPLETE
+flag before the CRC for the data portion (recorded in the footer)
+has been completely computed. Hold off setting the complete flag
+until we've decided it's ready to send.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -592,6 +592,8 @@ static void prepare_write_message_footer
+ struct ceph_msg *m = con->out_msg;
+ int v = con->out_kvec_left;
+
++ m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
++
+ dout("prepare_write_message_footer %p\n", con);
+ con->out_kvec_is_msg = true;
+ con->out_kvec[v].iov_base = &m->footer;
+@@ -665,7 +667,7 @@ static void prepare_write_message(struct
+ /* fill in crc (except data pages), footer */
+ crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
+ con->out_msg->hdr.crc = cpu_to_le32(crc);
+- con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE;
++ con->out_msg->footer.flags = 0;
+
+ crc = crc32c(0, m->front.iov_base, m->front.iov_len);
+ con->out_msg->footer.front_crc = cpu_to_le32(crc);
--- /dev/null
+From ee8eaab277235ddcaaacf1497a06a6b47bce8cc5 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 11 Jun 2012 14:57:13 -0500
+Subject: libceph: move init_bio_*() functions up
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit df6ad1f97342ebc4270128222e896541405eecdb)
+
+Move init_bio_iter() and iter_bio_next() up in their source file so
+the'll be defined before they're needed.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 50 +++++++++++++++++++++++++-------------------------
+ 1 file changed, 25 insertions(+), 25 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -565,6 +565,31 @@ static void con_out_kvec_add(struct ceph
+ con->out_kvec_bytes += size;
+ }
+
++#ifdef CONFIG_BLOCK
++static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
++{
++ if (!bio) {
++ *iter = NULL;
++ *seg = 0;
++ return;
++ }
++ *iter = bio;
++ *seg = bio->bi_idx;
++}
++
++static void iter_bio_next(struct bio **bio_iter, int *seg)
++{
++ if (*bio_iter == NULL)
++ return;
++
++ BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
++
++ (*seg)++;
++ if (*seg == (*bio_iter)->bi_vcnt)
++ init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
++}
++#endif
++
+ static void prepare_write_message_data(struct ceph_connection *con)
+ {
+ struct ceph_msg *msg = con->out_msg;
+@@ -868,31 +893,6 @@ out:
+ return ret; /* done! */
+ }
+
+-#ifdef CONFIG_BLOCK
+-static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
+-{
+- if (!bio) {
+- *iter = NULL;
+- *seg = 0;
+- return;
+- }
+- *iter = bio;
+- *seg = bio->bi_idx;
+-}
+-
+-static void iter_bio_next(struct bio **bio_iter, int *seg)
+-{
+- if (*bio_iter == NULL)
+- return;
+-
+- BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
+-
+- (*seg)++;
+- if (*seg == (*bio_iter)->bi_vcnt)
+- init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
+-}
+-#endif
+-
+ static void out_msg_pos_next(struct ceph_connection *con, struct page *page,
+ size_t len, size_t sent, bool in_trail)
+ {
--- /dev/null
+From c7d4fdc52c6730b8c1f1c90637154152d5bec61c Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 11 Jun 2012 14:57:13 -0500
+Subject: libceph: move init of bio_iter
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 572c588edadaa3da3992bd8a0fed830bbcc861f8)
+
+If a message has a non-null bio pointer, its bio_iter field is
+initialized in write_partial_msg_pages() if this has not been done
+already. This is really a one-time setup operation for sending a
+message's (bio) data, so move that initialization code into
+prepare_write_message_data() which serves that purpose.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -603,6 +603,10 @@ static void prepare_write_message_data(s
+ con->out_msg_pos.page_pos = msg->page_alignment;
+ else
+ con->out_msg_pos.page_pos = 0;
++#ifdef CONFIG_BLOCK
++ if (msg->bio && !msg->bio_iter)
++ init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
++#endif
+ con->out_msg_pos.data_pos = 0;
+ con->out_msg_pos.did_page_crc = false;
+ con->out_more = 1; /* data + footer will follow */
+@@ -942,11 +946,6 @@ static int write_partial_msg_pages(struc
+ con, msg, con->out_msg_pos.page, msg->nr_pages,
+ con->out_msg_pos.page_pos);
+
+-#ifdef CONFIG_BLOCK
+- if (msg->bio && !msg->bio_iter)
+- init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
+-#endif
+-
+ while (data_len > con->out_msg_pos.data_pos) {
+ struct page *page = NULL;
+ int max_write = PAGE_SIZE;
--- /dev/null
+From 3366182e1290e0a4ad8d4ee5ac06388994b6f05a Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 11 Jun 2012 14:57:13 -0500
+Subject: libceph: don't use bio_iter as a flag
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit abdaa6a849af1d63153682c11f5bbb22dacb1f6b)
+
+Recently a bug was fixed in which the bio_iter field in a ceph
+message was not being properly re-initialized when a message got
+re-transmitted:
+ commit 43643528cce60ca184fe8197efa8e8da7c89a037
+ Author: Yan, Zheng <zheng.z.yan@intel.com>
+ rbd: Clear ceph_msg->bio_iter for retransmitted message
+
+We are now only initializing the bio_iter field when we are about to
+start to write message data (in prepare_write_message_data()),
+rather than every time we are attempting to write any portion of the
+message data (in write_partial_msg_pages()). This means we no
+longer need to use the msg->bio_iter field as a flag.
+
+So just don't do that any more. Trust prepare_write_message_data()
+to ensure msg->bio_iter is properly initialized, every time we are
+about to begin writing (or re-writing) a message's bio data.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 6 +-----
+ 1 file changed, 1 insertion(+), 5 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -604,7 +604,7 @@ static void prepare_write_message_data(s
+ else
+ con->out_msg_pos.page_pos = 0;
+ #ifdef CONFIG_BLOCK
+- if (msg->bio && !msg->bio_iter)
++ if (msg->bio)
+ init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
+ #endif
+ con->out_msg_pos.data_pos = 0;
+@@ -672,10 +672,6 @@ static void prepare_write_message(struct
+ m->hdr.seq = cpu_to_le64(++con->out_seq);
+ m->needs_out_seq = false;
+ }
+-#ifdef CONFIG_BLOCK
+- else
+- m->bio_iter = NULL;
+-#endif
+
+ dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
+ m, con->out_seq, le16_to_cpu(m->hdr.type),
--- /dev/null
+From 2ced3e12ab2e6342d911685ed500c8a5497da195 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 20 Jun 2012 21:53:53 -0500
+Subject: libceph: SOCK_CLOSED is a flag, not a state
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit a8d00e3cdef4c1c4f194414b72b24cd995439a05)
+
+The following commit changed it so SOCK_CLOSED bit was stored in
+a connection's new "flags" field rather than its "state" field.
+
+ libceph: start separating connection flags from state
+ commit 928443cd
+
+That bit is used in con_close_socket() to protect against setting an
+error message more than once in the socket event handler function.
+
+Unfortunately, the field being operated on in that function was not
+updated to be "flags" as it should have been. This fixes that
+error.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -397,11 +397,11 @@ static int con_close_socket(struct ceph_
+ dout("con_close_socket on %p sock %p\n", con, con->sock);
+ if (!con->sock)
+ return 0;
+- set_bit(SOCK_CLOSED, &con->state);
++ set_bit(SOCK_CLOSED, &con->flags);
+ rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
+ sock_release(con->sock);
+ con->sock = NULL;
+- clear_bit(SOCK_CLOSED, &con->state);
++ clear_bit(SOCK_CLOSED, &con->flags);
+ con_sock_state_closed(con);
+ return rc;
+ }
--- /dev/null
+From dceeaf54ac589c72f64f8832e9807becaca99cac Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 20 Jun 2012 21:53:53 -0500
+Subject: libceph: don't change socket state on sock event
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 188048bce311ee41e5178bc3255415d0eae28423)
+
+Currently the socket state change event handler records an error
+message on a connection to distinguish a close while connecting from
+a close while a connection was already established.
+
+Changing connection information during handling of a socket event is
+not very clean, so instead move this assignment inside con_work(),
+where it can be done during normal connection-level processing (and
+under protection of the connection mutex as well).
+
+Move the handling of a socket closed event up to the top of the
+processing loop in con_work(); there's no point in handling backoff
+etc. if we have a newly-closed socket to take care of.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -261,13 +261,8 @@ static void ceph_sock_state_change(struc
+ case TCP_CLOSE_WAIT:
+ dout("%s TCP_CLOSE_WAIT\n", __func__);
+ con_sock_state_closing(con);
+- if (test_and_set_bit(SOCK_CLOSED, &con->flags) == 0) {
+- if (test_bit(CONNECTING, &con->state))
+- con->error_msg = "connection failed";
+- else
+- con->error_msg = "socket closed";
++ if (!test_and_set_bit(SOCK_CLOSED, &con->flags))
+ queue_con(con);
+- }
+ break;
+ case TCP_ESTABLISHED:
+ dout("%s TCP_ESTABLISHED\n", __func__);
+@@ -2187,6 +2182,14 @@ static void con_work(struct work_struct
+
+ mutex_lock(&con->mutex);
+ restart:
++ if (test_and_clear_bit(SOCK_CLOSED, &con->flags)) {
++ if (test_bit(CONNECTING, &con->state))
++ con->error_msg = "connection failed";
++ else
++ con->error_msg = "socket closed";
++ goto fault;
++ }
++
+ if (test_and_clear_bit(BACKOFF, &con->flags)) {
+ dout("con_work %p backing off\n", con);
+ if (queue_delayed_work(ceph_msgr_wq, &con->work,
+@@ -2216,9 +2219,6 @@ restart:
+ con_close_socket(con);
+ }
+
+- if (test_and_clear_bit(SOCK_CLOSED, &con->flags))
+- goto fault;
+-
+ ret = try_read(con);
+ if (ret == -EAGAIN)
+ goto restart;
--- /dev/null
+From 8dce8a3a4ed060800e36545bbabd63e23a459371 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 20 Jun 2012 21:53:53 -0500
+Subject: libceph: just set SOCK_CLOSED when state changes
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit d65c9e0b9eb43d14ece9dd843506ccba06162ee7)
+
+When a TCP_CLOSE or TCP_CLOSE_WAIT event occurs, the SOCK_CLOSED
+connection flag bit is set, and if it had not been previously set
+queue_con() is called to ensure con_work() will get a chance to
+handle the changed state.
+
+con_work() atomically checks--and if set, clears--the SOCK_CLOSED
+bit if it was set. This means that even if the bit were set
+repeatedly, the related processing in con_work() only gets called
+once per transition of the bit from 0 to 1.
+
+What's important then is that we ensure con_work() gets called *at
+least* once when a socket close event occurs, not that it gets
+called *exactly* once.
+
+The work queue mechanism already takes care of queueing work
+only if it is not already queued, so there's no need for us
+to call queue_con() conditionally.
+
+So this patch just makes it so the SOCK_CLOSED flag gets set
+unconditionally in ceph_sock_state_change().
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -261,8 +261,8 @@ static void ceph_sock_state_change(struc
+ case TCP_CLOSE_WAIT:
+ dout("%s TCP_CLOSE_WAIT\n", __func__);
+ con_sock_state_closing(con);
+- if (!test_and_set_bit(SOCK_CLOSED, &con->flags))
+- queue_con(con);
++ set_bit(SOCK_CLOSED, &con->flags);
++ queue_con(con);
+ break;
+ case TCP_ESTABLISHED:
+ dout("%s TCP_ESTABLISHED\n", __func__);
--- /dev/null
+From aac3fdffbb4f4822ad58ea99dace727e2fbf3af5 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 20 Jun 2012 21:53:53 -0500
+Subject: libceph: don't touch con state in con_close_socket()
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 456ea46865787283088b23a8a7f69244513b95f0)
+
+In con_close_socket(), a connection's SOCK_CLOSED flag gets set and
+then cleared while its shutdown method is called and its reference
+gets dropped.
+
+Previously, that flag got set only if it had not already been set,
+so setting it in con_close_socket() might have prevented additional
+processing being done on a socket being shut down. We no longer set
+SOCK_CLOSED in the socket event routine conditionally, so setting
+that bit here no longer provides whatever benefit it might have
+provided before.
+
+A race condition could still leave the SOCK_CLOSED bit set even
+after we've issued the call to con_close_socket(), so we still clear
+that bit after shutting the socket down. Add a comment explaining
+the reason for this.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -392,10 +392,16 @@ static int con_close_socket(struct ceph_
+ dout("con_close_socket on %p sock %p\n", con, con->sock);
+ if (!con->sock)
+ return 0;
+- set_bit(SOCK_CLOSED, &con->flags);
+ rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
+ sock_release(con->sock);
+ con->sock = NULL;
++
++ /*
++ * Forcibly clear the SOCK_CLOSE flag. It gets set
++ * independent of the connection mutex, and we could have
++ * received a socket close event before we had the chance to
++ * shut the socket down.
++ */
+ clear_bit(SOCK_CLOSED, &con->flags);
+ con_sock_state_closed(con);
+ return rc;
--- /dev/null
+From 95d4d98b14a94dceb1c9a5e12101bcf76fdb9958 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 20 Jun 2012 21:53:53 -0500
+Subject: libceph: clear CONNECTING in ceph_con_close()
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit bb9e6bba5d8b85b631390f8dbe8a24ae1ff5b48a)
+
+A connection that is closed will no longer be connecting. So
+clear the CONNECTING state bit in ceph_con_close(). Similarly,
+if the socket has been closed we no longer are in connecting
+state (a new connect sequence will need to be initiated).
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -462,6 +462,7 @@ void ceph_con_close(struct ceph_connecti
+ dout("con_close %p peer %s\n", con,
+ ceph_pr_addr(&con->peer_addr.in_addr));
+ clear_bit(NEGOTIATING, &con->state);
++ clear_bit(CONNECTING, &con->state);
+ clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */
+ set_bit(CLOSED, &con->state);
+
+@@ -2189,7 +2190,7 @@ static void con_work(struct work_struct
+ mutex_lock(&con->mutex);
+ restart:
+ if (test_and_clear_bit(SOCK_CLOSED, &con->flags)) {
+- if (test_bit(CONNECTING, &con->state))
++ if (test_and_clear_bit(CONNECTING, &con->state))
+ con->error_msg = "connection failed";
+ else
+ con->error_msg = "socket closed";
--- /dev/null
+From 9694fce886b969ab62035eb3cd8648be0f7984be Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 23 May 2012 14:35:23 -0500
+Subject: libceph: clear NEGOTIATING when done
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 3ec50d1868a9e0493046400bb1fdd054c7f64ebd)
+
+A connection state's NEGOTIATING bit gets set while in CONNECTING
+state after we have successfully exchanged a ceph banner and IP
+addresses with the connection's peer (the server). But that bit
+is not cleared again--at least not until another connection attempt
+is initiated.
+
+Instead, clear it as soon as the connection is fully established.
+Also, clear it when a socket connection gets prematurely closed
+in the midst of establishing a ceph connection (in case we had
+reached the point where it was set).
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1562,6 +1562,7 @@ static int process_connect(struct ceph_c
+ fail_protocol(con);
+ return -1;
+ }
++ clear_bit(NEGOTIATING, &con->state);
+ clear_bit(CONNECTING, &con->state);
+ con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
+ con->connect_seq++;
+@@ -1951,7 +1952,6 @@ more:
+
+ /* open the socket first? */
+ if (con->sock == NULL) {
+- clear_bit(NEGOTIATING, &con->state);
+ set_bit(CONNECTING, &con->state);
+
+ con_out_kvec_reset(con);
+@@ -2190,10 +2190,12 @@ static void con_work(struct work_struct
+ mutex_lock(&con->mutex);
+ restart:
+ if (test_and_clear_bit(SOCK_CLOSED, &con->flags)) {
+- if (test_and_clear_bit(CONNECTING, &con->state))
++ if (test_and_clear_bit(CONNECTING, &con->state)) {
++ clear_bit(NEGOTIATING, &con->state);
+ con->error_msg = "connection failed";
+- else
++ } else {
+ con->error_msg = "socket closed";
++ }
+ goto fault;
+ }
+
--- /dev/null
+From ad53e0fbfe4fe70c2434fa1ce9707a3f1eb6f211 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 23 May 2012 14:35:23 -0500
+Subject: libceph: define and use an explicit CONNECTED state
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit e27947c767f5bed15048f4e4dad3e2eb69133697)
+
+There is no state explicitly defined when a ceph connection is fully
+operational. So define one.
+
+It's set when the connection sequence completes successfully, and is
+cleared when the connection gets closed.
+
+Be a little more careful when examining the old state when a socket
+disconnect event is reported.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/messenger.h | 1 +
+ net/ceph/messenger.c | 9 +++++++--
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -120,6 +120,7 @@ struct ceph_msg_pos {
+ */
+ #define CONNECTING 1
+ #define NEGOTIATING 2
++#define CONNECTED 5
+ #define STANDBY 8 /* no outgoing messages, socket closed. we keep
+ * the ceph_connection around to maintain shared
+ * state with the peer. */
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -463,6 +463,7 @@ void ceph_con_close(struct ceph_connecti
+ ceph_pr_addr(&con->peer_addr.in_addr));
+ clear_bit(NEGOTIATING, &con->state);
+ clear_bit(CONNECTING, &con->state);
++ clear_bit(CONNECTED, &con->state);
+ clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */
+ set_bit(CLOSED, &con->state);
+
+@@ -1564,6 +1565,7 @@ static int process_connect(struct ceph_c
+ }
+ clear_bit(NEGOTIATING, &con->state);
+ clear_bit(CONNECTING, &con->state);
++ set_bit(CONNECTED, &con->state);
+ con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
+ con->connect_seq++;
+ con->peer_features = server_feat;
+@@ -2114,6 +2116,7 @@ more:
+ prepare_read_ack(con);
+ break;
+ case CEPH_MSGR_TAG_CLOSE:
++ clear_bit(CONNECTED, &con->state);
+ set_bit(CLOSED, &con->state); /* fixme */
+ goto out;
+ default:
+@@ -2190,11 +2193,13 @@ static void con_work(struct work_struct
+ mutex_lock(&con->mutex);
+ restart:
+ if (test_and_clear_bit(SOCK_CLOSED, &con->flags)) {
+- if (test_and_clear_bit(CONNECTING, &con->state)) {
++ if (test_and_clear_bit(CONNECTED, &con->state))
++ con->error_msg = "socket closed";
++ else if (test_and_clear_bit(CONNECTING, &con->state)) {
+ clear_bit(NEGOTIATING, &con->state);
+ con->error_msg = "connection failed";
+ } else {
+- con->error_msg = "socket closed";
++ con->error_msg = "unrecognized con state";
+ }
+ goto fault;
+ }
--- /dev/null
+From 4e06ab61b13f884af3c1d76bbcd56043efe3fe27 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Thu, 31 May 2012 11:37:29 -0500
+Subject: libceph: separate banner and connect writes
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit ab166d5aa3bc036fba7efaca6e4e43a7e9510acf)
+
+There are two phases in the process of linking together the two ends
+of a ceph connection. The first involves exchanging a banner and
+IP addresses, and if that is successful a second phase exchanges
+some detail about each side's connection capabilities.
+
+When initiating a connection, the client side now queues to send
+its information for both phases of this process at the same time.
+This is probably a bit more efficient, but it is slightly messier
+from a layering perspective in the code.
+
+So rearrange things so that the client doesn't send the connection
+information until it has received and processed the response in the
+initial banner phase (in process_banner()).
+
+Move the code (in the (con->sock == NULL) case in try_write()) that
+prepares for writing the connection information, delaying doing that
+until the banner exchange has completed. Move the code that begins
+the transition to this second "NEGOTIATING" phase out of
+process_banner() and into its caller, so preparing to write the
+connection information and preparing to read the response are
+adjacent to each other.
+
+Finally, preparing to write the connection information now requires
+the output kvec to be reset in all cases, so move that into the
+prepare_write_connect() and delete it from all callers.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 20 +++++++++++---------
+ 1 file changed, 11 insertions(+), 9 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -841,6 +841,7 @@ static int prepare_write_connect(struct
+ con->out_connect.authorizer_len = auth ?
+ cpu_to_le32(auth->authorizer_buf_len) : 0;
+
++ con_out_kvec_reset(con);
+ con_out_kvec_add(con, sizeof (con->out_connect),
+ &con->out_connect);
+ if (auth && auth->authorizer_buf_len)
+@@ -1430,8 +1431,6 @@ static int process_banner(struct ceph_co
+ ceph_pr_addr(&con->msgr->inst.addr.in_addr));
+ }
+
+- set_bit(NEGOTIATING, &con->state);
+- prepare_read_connect(con);
+ return 0;
+ }
+
+@@ -1481,7 +1480,6 @@ static int process_connect(struct ceph_c
+ return -1;
+ }
+ con->auth_retry = 1;
+- con_out_kvec_reset(con);
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ return ret;
+@@ -1502,7 +1500,6 @@ static int process_connect(struct ceph_c
+ ENTITY_NAME(con->peer_name),
+ ceph_pr_addr(&con->peer_addr.in_addr));
+ reset_connection(con);
+- con_out_kvec_reset(con);
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ return ret;
+@@ -1528,7 +1525,6 @@ static int process_connect(struct ceph_c
+ le32_to_cpu(con->out_connect.connect_seq),
+ le32_to_cpu(con->in_reply.connect_seq));
+ con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
+- con_out_kvec_reset(con);
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ return ret;
+@@ -1545,7 +1541,6 @@ static int process_connect(struct ceph_c
+ le32_to_cpu(con->in_reply.global_seq));
+ get_global_seq(con->msgr,
+ le32_to_cpu(con->in_reply.global_seq));
+- con_out_kvec_reset(con);
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ return ret;
+@@ -1958,9 +1953,6 @@ more:
+
+ con_out_kvec_reset(con);
+ prepare_write_banner(con);
+- ret = prepare_write_connect(con);
+- if (ret < 0)
+- goto out;
+ prepare_read_banner(con);
+
+ BUG_ON(con->in_msg);
+@@ -2073,6 +2065,16 @@ more:
+ ret = process_banner(con);
+ if (ret < 0)
+ goto out;
++
++ /* Banner is good, exchange connection info */
++ ret = prepare_write_connect(con);
++ if (ret < 0)
++ goto out;
++ prepare_read_connect(con);
++ set_bit(NEGOTIATING, &con->state);
++
++ /* Send connection info before awaiting response */
++ goto out;
+ }
+ ret = read_partial_connect(con);
+ if (ret <= 0)
--- /dev/null
+From 7a38d7284d9cf4eeda0f96eef1cb14be618da5c0 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Thu, 24 May 2012 11:55:03 -0500
+Subject: libceph: distinguish two phases of connect sequence
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 7593af920baac37752190a0db703d2732bed4a3b)
+
+Currently a ceph connection enters a "CONNECTING" state when it
+begins the process of (re-)connecting with its peer. Once the two
+ends have successfully exchanged their banner and addresses, an
+additional NEGOTIATING bit is set in the ceph connection's state to
+indicate the connection information exhange has begun. The
+CONNECTING bit/state continues to be set during this phase.
+
+Rather than have the CONNECTING state continue while the NEGOTIATING
+bit is set, interpret these two phases as distinct states. In other
+words, when NEGOTIATING is set, clear CONNECTING. That way only
+one of them will be active at a time.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 50 +++++++++++++++++++++++++++-----------------------
+ 1 file changed, 27 insertions(+), 23 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1559,7 +1559,6 @@ static int process_connect(struct ceph_c
+ return -1;
+ }
+ clear_bit(NEGOTIATING, &con->state);
+- clear_bit(CONNECTING, &con->state);
+ set_bit(CONNECTED, &con->state);
+ con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
+ con->connect_seq++;
+@@ -2000,7 +1999,8 @@ more_kvec:
+ }
+
+ do_next:
+- if (!test_bit(CONNECTING, &con->state)) {
++ if (!test_bit(CONNECTING, &con->state) &&
++ !test_bit(NEGOTIATING, &con->state)) {
+ /* is anything else pending? */
+ if (!list_empty(&con->out_queue)) {
+ prepare_write_message(con);
+@@ -2057,25 +2057,29 @@ more:
+ }
+
+ if (test_bit(CONNECTING, &con->state)) {
+- if (!test_bit(NEGOTIATING, &con->state)) {
+- dout("try_read connecting\n");
+- ret = read_partial_banner(con);
+- if (ret <= 0)
+- goto out;
+- ret = process_banner(con);
+- if (ret < 0)
+- goto out;
+-
+- /* Banner is good, exchange connection info */
+- ret = prepare_write_connect(con);
+- if (ret < 0)
+- goto out;
+- prepare_read_connect(con);
+- set_bit(NEGOTIATING, &con->state);
++ dout("try_read connecting\n");
++ ret = read_partial_banner(con);
++ if (ret <= 0)
++ goto out;
++ ret = process_banner(con);
++ if (ret < 0)
++ goto out;
++
++ clear_bit(CONNECTING, &con->state);
++ set_bit(NEGOTIATING, &con->state);
+
+- /* Send connection info before awaiting response */
++ /* Banner is good, exchange connection info */
++ ret = prepare_write_connect(con);
++ if (ret < 0)
+ goto out;
+- }
++ prepare_read_connect(con);
++
++ /* Send connection info before awaiting response */
++ goto out;
++ }
++
++ if (test_bit(NEGOTIATING, &con->state)) {
++ dout("try_read negotiating\n");
+ ret = read_partial_connect(con);
+ if (ret <= 0)
+ goto out;
+@@ -2197,12 +2201,12 @@ restart:
+ if (test_and_clear_bit(SOCK_CLOSED, &con->flags)) {
+ if (test_and_clear_bit(CONNECTED, &con->state))
+ con->error_msg = "socket closed";
+- else if (test_and_clear_bit(CONNECTING, &con->state)) {
+- clear_bit(NEGOTIATING, &con->state);
++ else if (test_and_clear_bit(NEGOTIATING, &con->state))
++ con->error_msg = "negotiation failed";
++ else if (test_and_clear_bit(CONNECTING, &con->state))
+ con->error_msg = "connection failed";
+- } else {
++ else
+ con->error_msg = "unrecognized con state";
+- }
+ goto fault;
+ }
+
--- /dev/null
+From 2203e5cd059ff4aeafebaf4c4efeca850841794e Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 11 Jun 2012 14:57:13 -0500
+Subject: libceph: small changes to messenger.c
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 5821bd8ccdf5d17ab2c391c773756538603838c3)
+
+This patch gathers a few small changes in "net/ceph/messenger.c":
+ out_msg_pos_next()
+ - small logic change that mostly affects indentation
+ write_partial_msg_pages().
+ - use a local variable trail_off to represent the offset into
+ a message of the trail portion of the data (if present)
+ - once we are in the trail portion we will always be there, so we
+ don't always need to check against our data position
+ - avoid computing len twice after we've reached the trail
+ - get rid of the variable tmpcrc, which is not needed
+ - trail_off and trail_len never change so mark them const
+ - update some comments
+ read_partial_message_bio()
+ - bio_iovec_idx() will never return an error, so don't bother
+ checking for it
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 63 +++++++++++++++++++++++++--------------------------
+ 1 file changed, 31 insertions(+), 32 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -907,21 +907,23 @@ static void out_msg_pos_next(struct ceph
+
+ con->out_msg_pos.data_pos += sent;
+ con->out_msg_pos.page_pos += sent;
+- if (sent == len) {
+- con->out_msg_pos.page_pos = 0;
+- con->out_msg_pos.page++;
+- con->out_msg_pos.did_page_crc = false;
+- if (in_trail)
+- list_move_tail(&page->lru,
+- &msg->trail->head);
+- else if (msg->pagelist)
+- list_move_tail(&page->lru,
+- &msg->pagelist->head);
++ if (sent < len)
++ return;
++
++ BUG_ON(sent != len);
++ con->out_msg_pos.page_pos = 0;
++ con->out_msg_pos.page++;
++ con->out_msg_pos.did_page_crc = false;
++ if (in_trail)
++ list_move_tail(&page->lru,
++ &msg->trail->head);
++ else if (msg->pagelist)
++ list_move_tail(&page->lru,
++ &msg->pagelist->head);
+ #ifdef CONFIG_BLOCK
+- else if (msg->bio)
+- iter_bio_next(&msg->bio_iter, &msg->bio_seg);
++ else if (msg->bio)
++ iter_bio_next(&msg->bio_iter, &msg->bio_seg);
+ #endif
+- }
+ }
+
+ /*
+@@ -940,30 +942,31 @@ static int write_partial_msg_pages(struc
+ int ret;
+ int total_max_write;
+ bool in_trail = false;
+- size_t trail_len = (msg->trail ? msg->trail->length : 0);
++ const size_t trail_len = (msg->trail ? msg->trail->length : 0);
++ const size_t trail_off = data_len - trail_len;
+
+ dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n",
+ con, msg, con->out_msg_pos.page, msg->nr_pages,
+ con->out_msg_pos.page_pos);
+
++ /*
++ * Iterate through each page that contains data to be
++ * written, and send as much as possible for each.
++ *
++ * If we are calculating the data crc (the default), we will
++ * need to map the page. If we have no pages, they have
++ * been revoked, so use the zero page.
++ */
+ while (data_len > con->out_msg_pos.data_pos) {
+ struct page *page = NULL;
+ int max_write = PAGE_SIZE;
+ int bio_offset = 0;
+
+- total_max_write = data_len - trail_len -
+- con->out_msg_pos.data_pos;
+-
+- /*
+- * if we are calculating the data crc (the default), we need
+- * to map the page. if our pages[] has been revoked, use the
+- * zero page.
+- */
+-
+- /* have we reached the trail part of the data? */
+- if (con->out_msg_pos.data_pos >= data_len - trail_len) {
+- in_trail = true;
++ in_trail = in_trail || con->out_msg_pos.data_pos >= trail_off;
++ if (!in_trail)
++ total_max_write = trail_off - con->out_msg_pos.data_pos;
+
++ if (in_trail) {
+ total_max_write = data_len - con->out_msg_pos.data_pos;
+
+ page = list_first_entry(&msg->trail->head,
+@@ -990,14 +993,13 @@ static int write_partial_msg_pages(struc
+
+ if (do_datacrc && !con->out_msg_pos.did_page_crc) {
+ void *base;
+- u32 crc;
+- u32 tmpcrc = le32_to_cpu(msg->footer.data_crc);
++ u32 crc = le32_to_cpu(msg->footer.data_crc);
+ char *kaddr;
+
+ kaddr = kmap(page);
+ BUG_ON(kaddr == NULL);
+ base = kaddr + con->out_msg_pos.page_pos + bio_offset;
+- crc = crc32c(tmpcrc, base, len);
++ crc = crc32c(crc, base, len);
+ msg->footer.data_crc = cpu_to_le32(crc);
+ con->out_msg_pos.did_page_crc = true;
+ }
+@@ -1702,9 +1704,6 @@ static int read_partial_message_bio(stru
+ void *p;
+ int ret, left;
+
+- if (IS_ERR(bv))
+- return PTR_ERR(bv);
+-
+ left = min((int)(data_len - con->in_msg_pos.data_pos),
+ (int)(bv->bv_len - con->in_msg_pos.page_pos));
+
--- /dev/null
+From d31a25445536005c730ff305acef54719aac7f23 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 20 Jun 2012 21:53:53 -0500
+Subject: libceph: add some fine ASCII art
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit bc18f4b1c850ab355e38373fbb60fd28568d84b5)
+
+Sage liked the state diagram I put in my commit description so
+I'm putting it in with the code.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c | 42 +++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 41 insertions(+), 1 deletion(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -29,7 +29,47 @@
+ * the sender.
+ */
+
+-/* State values for ceph_connection->sock_state; NEW is assumed to be 0 */
++/*
++ * We track the state of the socket on a given connection using
++ * values defined below. The transition to a new socket state is
++ * handled by a function which verifies we aren't coming from an
++ * unexpected state.
++ *
++ * --------
++ * | NEW* | transient initial state
++ * --------
++ * | con_sock_state_init()
++ * v
++ * ----------
++ * | CLOSED | initialized, but no socket (and no
++ * ---------- TCP connection)
++ * ^ \
++ * | \ con_sock_state_connecting()
++ * | ----------------------
++ * | \
++ * + con_sock_state_closed() \
++ * |\ \
++ * | \ \
++ * | ----------- \
++ * | | CLOSING | socket event; \
++ * | ----------- await close \
++ * | ^ |
++ * | | |
++ * | + con_sock_state_closing() |
++ * | / \ |
++ * | / --------------- |
++ * | / \ v
++ * | / --------------
++ * | / -----------------| CONNECTING | socket created, TCP
++ * | | / -------------- connect initiated
++ * | | | con_sock_state_connected()
++ * | | v
++ * -------------
++ * | CONNECTED | TCP connection established
++ * -------------
++ *
++ * State values for ceph_connection->sock_state; NEW is assumed to be 0.
++ */
+
+ #define CON_SOCK_STATE_NEW 0 /* -> CLOSED */
+ #define CON_SOCK_STATE_CLOSED 1 /* -> CONNECTING */
0057-libceph-fix-overflow-in-osdmap_decode.patch
0058-libceph-fix-overflow-in-osdmap_apply_incremental.patch
0059-libceph-transition-socket-state-prior-to-actual-conn.patch
+0060-libceph-fix-NULL-dereference-in-reset_connection.patch
+0061-libceph-use-con-get-put-methods.patch
+0062-libceph-drop-ceph_con_get-put-helpers-and-nref-membe.patch
+0063-libceph-encapsulate-out-message-data-setup.patch
+0064-libceph-encapsulate-advancing-msg-page.patch
+0065-libceph-don-t-mark-footer-complete-before-it-is.patch
+0066-libceph-move-init_bio_-functions-up.patch
+0067-libceph-move-init-of-bio_iter.patch
+0068-libceph-don-t-use-bio_iter-as-a-flag.patch
+0069-libceph-SOCK_CLOSED-is-a-flag-not-a-state.patch
+0070-libceph-don-t-change-socket-state-on-sock-event.patch
+0071-libceph-just-set-SOCK_CLOSED-when-state-changes.patch
+0072-libceph-don-t-touch-con-state-in-con_close_socket.patch
+0073-libceph-clear-CONNECTING-in-ceph_con_close.patch
+0074-libceph-clear-NEGOTIATING-when-done.patch
+0075-libceph-define-and-use-an-explicit-CONNECTED-state.patch
+0076-libceph-separate-banner-and-connect-writes.patch
+0077-libceph-distinguish-two-phases-of-connect-sequence.patch
+0078-libceph-small-changes-to-messenger.c.patch
+0079-libceph-add-some-fine-ASCII-art.patch