]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 21 Nov 2012 20:08:43 +0000 (12:08 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 21 Nov 2012 20:08:43 +0000 (12:08 -0800)
added patches:
0060-libceph-fix-NULL-dereference-in-reset_connection.patch
0061-libceph-use-con-get-put-methods.patch
0062-libceph-drop-ceph_con_get-put-helpers-and-nref-membe.patch
0063-libceph-encapsulate-out-message-data-setup.patch
0064-libceph-encapsulate-advancing-msg-page.patch
0065-libceph-don-t-mark-footer-complete-before-it-is.patch
0066-libceph-move-init_bio_-functions-up.patch
0067-libceph-move-init-of-bio_iter.patch
0068-libceph-don-t-use-bio_iter-as-a-flag.patch
0069-libceph-SOCK_CLOSED-is-a-flag-not-a-state.patch
0070-libceph-don-t-change-socket-state-on-sock-event.patch
0071-libceph-just-set-SOCK_CLOSED-when-state-changes.patch
0072-libceph-don-t-touch-con-state-in-con_close_socket.patch
0073-libceph-clear-CONNECTING-in-ceph_con_close.patch
0074-libceph-clear-NEGOTIATING-when-done.patch
0075-libceph-define-and-use-an-explicit-CONNECTED-state.patch
0076-libceph-separate-banner-and-connect-writes.patch
0077-libceph-distinguish-two-phases-of-connect-sequence.patch
0078-libceph-small-changes-to-messenger.c.patch
0079-libceph-add-some-fine-ASCII-art.patch

21 files changed:
queue-3.4/0060-libceph-fix-NULL-dereference-in-reset_connection.patch [new file with mode: 0644]
queue-3.4/0061-libceph-use-con-get-put-methods.patch [new file with mode: 0644]
queue-3.4/0062-libceph-drop-ceph_con_get-put-helpers-and-nref-membe.patch [new file with mode: 0644]
queue-3.4/0063-libceph-encapsulate-out-message-data-setup.patch [new file with mode: 0644]
queue-3.4/0064-libceph-encapsulate-advancing-msg-page.patch [new file with mode: 0644]
queue-3.4/0065-libceph-don-t-mark-footer-complete-before-it-is.patch [new file with mode: 0644]
queue-3.4/0066-libceph-move-init_bio_-functions-up.patch [new file with mode: 0644]
queue-3.4/0067-libceph-move-init-of-bio_iter.patch [new file with mode: 0644]
queue-3.4/0068-libceph-don-t-use-bio_iter-as-a-flag.patch [new file with mode: 0644]
queue-3.4/0069-libceph-SOCK_CLOSED-is-a-flag-not-a-state.patch [new file with mode: 0644]
queue-3.4/0070-libceph-don-t-change-socket-state-on-sock-event.patch [new file with mode: 0644]
queue-3.4/0071-libceph-just-set-SOCK_CLOSED-when-state-changes.patch [new file with mode: 0644]
queue-3.4/0072-libceph-don-t-touch-con-state-in-con_close_socket.patch [new file with mode: 0644]
queue-3.4/0073-libceph-clear-CONNECTING-in-ceph_con_close.patch [new file with mode: 0644]
queue-3.4/0074-libceph-clear-NEGOTIATING-when-done.patch [new file with mode: 0644]
queue-3.4/0075-libceph-define-and-use-an-explicit-CONNECTED-state.patch [new file with mode: 0644]
queue-3.4/0076-libceph-separate-banner-and-connect-writes.patch [new file with mode: 0644]
queue-3.4/0077-libceph-distinguish-two-phases-of-connect-sequence.patch [new file with mode: 0644]
queue-3.4/0078-libceph-small-changes-to-messenger.c.patch [new file with mode: 0644]
queue-3.4/0079-libceph-add-some-fine-ASCII-art.patch [new file with mode: 0644]
queue-3.4/series

diff --git a/queue-3.4/0060-libceph-fix-NULL-dereference-in-reset_connection.patch b/queue-3.4/0060-libceph-fix-NULL-dereference-in-reset_connection.patch
new file mode 100644 (file)
index 0000000..2bdd9aa
--- /dev/null
@@ -0,0 +1,29 @@
+From c08138f57dc37880110c91afcb7403f3d7a8aa3c Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Tue, 19 Jun 2012 08:52:33 -0500
+Subject: libceph: fix NULL dereference in reset_connection()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+(cherry picked from commit 26ce171915f348abd1f41da1ed139d93750d987f)
+
+We dereference "con->in_msg" on the line after it was set to NULL.
+
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -440,7 +440,7 @@ static void reset_connection(struct ceph
+               con->in_msg->con = NULL;
+               ceph_msg_put(con->in_msg);
+               con->in_msg = NULL;
+-              ceph_con_put(con->in_msg->con);
++              ceph_con_put(con);
+       }
+       con->connect_seq = 0;
diff --git a/queue-3.4/0061-libceph-use-con-get-put-methods.patch b/queue-3.4/0061-libceph-use-con-get-put-methods.patch
new file mode 100644 (file)
index 0000000..89e7c2c
--- /dev/null
@@ -0,0 +1,94 @@
+From 6df35f27bf01c07b95aa9b93657f5b00b919e231 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Thu, 21 Jun 2012 12:47:08 -0700
+Subject: libceph: use con get/put methods
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 36eb71aa57e6a33d61fd90a2fd87f00c6844bc86)
+
+The ceph_con_get/put() helpers manipulate the embedded con ref
+count, which isn't used now that ceph_connections are embedded in
+other structures.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |   16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -414,7 +414,7 @@ static void ceph_msg_remove(struct ceph_
+ {
+       list_del_init(&msg->list_head);
+       BUG_ON(msg->con == NULL);
+-      ceph_con_put(msg->con);
++      msg->con->ops->put(msg->con);
+       msg->con = NULL;
+       ceph_msg_put(msg);
+@@ -440,7 +440,7 @@ static void reset_connection(struct ceph
+               con->in_msg->con = NULL;
+               ceph_msg_put(con->in_msg);
+               con->in_msg = NULL;
+-              ceph_con_put(con);
++              con->ops->put(con);
+       }
+       con->connect_seq = 0;
+@@ -1919,7 +1919,7 @@ static void process_message(struct ceph_
+       con->in_msg->con = NULL;
+       msg = con->in_msg;
+       con->in_msg = NULL;
+-      ceph_con_put(con);
++      con->ops->put(con);
+       /* if first message, set peer_name */
+       if (con->peer_name.type == 0)
+@@ -2281,7 +2281,7 @@ static void ceph_fault(struct ceph_conne
+               con->in_msg->con = NULL;
+               ceph_msg_put(con->in_msg);
+               con->in_msg = NULL;
+-              ceph_con_put(con);
++              con->ops->put(con);
+       }
+       /* Requeue anything that hasn't been acked */
+@@ -2400,7 +2400,7 @@ void ceph_con_send(struct ceph_connectio
+       mutex_lock(&con->mutex);
+       BUG_ON(msg->con != NULL);
+-      msg->con = ceph_con_get(con);
++      msg->con = con->ops->get(con);
+       BUG_ON(msg->con == NULL);
+       BUG_ON(!list_empty(&msg->list_head));
+@@ -2436,7 +2436,7 @@ void ceph_msg_revoke(struct ceph_msg *ms
+               dout("%s %p msg %p - was on queue\n", __func__, con, msg);
+               list_del_init(&msg->list_head);
+               BUG_ON(msg->con == NULL);
+-              ceph_con_put(msg->con);
++              msg->con->ops->put(msg->con);
+               msg->con = NULL;
+               msg->hdr.seq = 0;
+@@ -2646,7 +2646,7 @@ static bool ceph_con_in_msg_alloc(struct
+               con->in_msg = con->ops->alloc_msg(con, hdr, &skip);
+               mutex_lock(&con->mutex);
+               if (con->in_msg) {
+-                      con->in_msg->con = ceph_con_get(con);
++                      con->in_msg->con = con->ops->get(con);
+                       BUG_ON(con->in_msg->con == NULL);
+               }
+               if (skip)
+@@ -2662,7 +2662,7 @@ static bool ceph_con_in_msg_alloc(struct
+                              type, front_len);
+                       return false;
+               }
+-              con->in_msg->con = ceph_con_get(con);
++              con->in_msg->con = con->ops->get(con);
+               BUG_ON(con->in_msg->con == NULL);
+               con->in_msg->page_alignment = le16_to_cpu(hdr->data_off);
+       }
diff --git a/queue-3.4/0062-libceph-drop-ceph_con_get-put-helpers-and-nref-membe.patch b/queue-3.4/0062-libceph-drop-ceph_con_get-put-helpers-and-nref-membe.patch
new file mode 100644 (file)
index 0000000..c801c7b
--- /dev/null
@@ -0,0 +1,80 @@
+From ecf2281ca8547a7a45fcb2a9f9010219f96899d3 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Thu, 21 Jun 2012 12:49:23 -0700
+Subject: libceph: drop ceph_con_get/put helpers and nref member
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit d59315ca8c0de00df9b363f94a2641a30961ca1c)
+
+These are no longer used.  Every ceph_connection instance is embedded in
+another structure, and refcounts manipulated via the get/put ops.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/messenger.h |    1 -
+ net/ceph/messenger.c           |   28 +---------------------------
+ 2 files changed, 1 insertion(+), 28 deletions(-)
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -135,7 +135,6 @@ struct ceph_msg_pos {
+  */
+ struct ceph_connection {
+       void *private;
+-      atomic_t nref;
+       const struct ceph_connection_operations *ops;
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -501,30 +501,6 @@ bool ceph_con_opened(struct ceph_connect
+ }
+ /*
+- * generic get/put
+- */
+-struct ceph_connection *ceph_con_get(struct ceph_connection *con)
+-{
+-      int nref = __atomic_add_unless(&con->nref, 1, 0);
+-
+-      dout("con_get %p nref = %d -> %d\n", con, nref, nref + 1);
+-
+-      return nref ? con : NULL;
+-}
+-
+-void ceph_con_put(struct ceph_connection *con)
+-{
+-      int nref = atomic_dec_return(&con->nref);
+-
+-      BUG_ON(nref < 0);
+-      if (nref == 0) {
+-              BUG_ON(con->sock);
+-              kfree(con);
+-      }
+-      dout("con_put %p nref = %d -> %d\n", con, nref + 1, nref);
+-}
+-
+-/*
+  * initialize a new connection.
+  */
+ void ceph_con_init(struct ceph_connection *con, void *private,
+@@ -535,7 +511,6 @@ void ceph_con_init(struct ceph_connectio
+       memset(con, 0, sizeof(*con));
+       con->private = private;
+       con->ops = ops;
+-      atomic_set(&con->nref, 1);
+       con->msgr = msgr;
+       con_sock_state_init(con);
+@@ -1951,8 +1926,7 @@ static int try_write(struct ceph_connect
+ {
+       int ret = 1;
+-      dout("try_write start %p state %lu nref %d\n", con, con->state,
+-           atomic_read(&con->nref));
++      dout("try_write start %p state %lu\n", con, con->state);
+ more:
+       dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
diff --git a/queue-3.4/0063-libceph-encapsulate-out-message-data-setup.patch b/queue-3.4/0063-libceph-encapsulate-out-message-data-setup.patch
new file mode 100644 (file)
index 0000000..dedf108
--- /dev/null
@@ -0,0 +1,78 @@
+From 848f392c4eb0a63a35a2a53234cf15340e9cb0fd Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 11 Jun 2012 14:57:13 -0500
+Subject: libceph: encapsulate out message data setup
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 739c905baa018c99003564ebc367d93aa44d4861)
+
+Move the code that prepares to write the data portion of a message
+into its own function.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |   37 +++++++++++++++++++++++--------------
+ 1 file changed, 23 insertions(+), 14 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -565,6 +565,24 @@ static void con_out_kvec_add(struct ceph
+       con->out_kvec_bytes += size;
+ }
++static void prepare_write_message_data(struct ceph_connection *con)
++{
++      struct ceph_msg *msg = con->out_msg;
++
++      BUG_ON(!msg);
++      BUG_ON(!msg->hdr.data_len);
++
++      /* initialize page iterator */
++      con->out_msg_pos.page = 0;
++      if (msg->pages)
++              con->out_msg_pos.page_pos = msg->page_alignment;
++      else
++              con->out_msg_pos.page_pos = 0;
++      con->out_msg_pos.data_pos = 0;
++      con->out_msg_pos.did_page_crc = false;
++      con->out_more = 1;  /* data + footer will follow */
++}
++
+ /*
+  * Prepare footer for currently outgoing message, and finish things
+  * off.  Assumes out_kvec* are already valid.. we just add on to the end.
+@@ -657,26 +675,17 @@ static void prepare_write_message(struct
+               con->out_msg->footer.middle_crc = cpu_to_le32(crc);
+       } else
+               con->out_msg->footer.middle_crc = 0;
+-      con->out_msg->footer.data_crc = 0;
+-      dout("prepare_write_message front_crc %u data_crc %u\n",
++      dout("%s front_crc %u middle_crc %u\n", __func__,
+            le32_to_cpu(con->out_msg->footer.front_crc),
+            le32_to_cpu(con->out_msg->footer.middle_crc));
+       /* is there a data payload? */
+-      if (le32_to_cpu(m->hdr.data_len) > 0) {
+-              /* initialize page iterator */
+-              con->out_msg_pos.page = 0;
+-              if (m->pages)
+-                      con->out_msg_pos.page_pos = m->page_alignment;
+-              else
+-                      con->out_msg_pos.page_pos = 0;
+-              con->out_msg_pos.data_pos = 0;
+-              con->out_msg_pos.did_page_crc = false;
+-              con->out_more = 1;  /* data + footer will follow */
+-      } else {
++      con->out_msg->footer.data_crc = 0;
++      if (m->hdr.data_len)
++              prepare_write_message_data(con);
++      else
+               /* no, queue up footer too and be done */
+               prepare_write_message_footer(con);
+-      }
+       set_bit(WRITE_PENDING, &con->flags);
+ }
diff --git a/queue-3.4/0064-libceph-encapsulate-advancing-msg-page.patch b/queue-3.4/0064-libceph-encapsulate-advancing-msg-page.patch
new file mode 100644 (file)
index 0000000..149c56b
--- /dev/null
@@ -0,0 +1,139 @@
+From 1a8d8ec0fb2260e062574c95480e6d74b7d19181 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 11 Jun 2012 14:57:13 -0500
+Subject: libceph: encapsulate advancing msg page
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 84ca8fc87fcf4ab97bb8acdb59bf97bb4820cb14)
+
+In write_partial_msg_pages(), once all the data from a page has been
+sent we advance to the next one.  Put the code that takes care of
+this into its own function.
+
+While modifying write_partial_msg_pages(), make its local variable
+"in_trail" be Boolean, and use the local variable "msg" (which is
+just the connection's current out_msg pointer) consistently.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |   58 +++++++++++++++++++++++++++++----------------------
+ 1 file changed, 34 insertions(+), 24 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -891,6 +891,33 @@ static void iter_bio_next(struct bio **b
+ }
+ #endif
++static void out_msg_pos_next(struct ceph_connection *con, struct page *page,
++                      size_t len, size_t sent, bool in_trail)
++{
++      struct ceph_msg *msg = con->out_msg;
++
++      BUG_ON(!msg);
++      BUG_ON(!sent);
++
++      con->out_msg_pos.data_pos += sent;
++      con->out_msg_pos.page_pos += sent;
++      if (sent == len) {
++              con->out_msg_pos.page_pos = 0;
++              con->out_msg_pos.page++;
++              con->out_msg_pos.did_page_crc = false;
++              if (in_trail)
++                      list_move_tail(&page->lru,
++                                     &msg->trail->head);
++              else if (msg->pagelist)
++                      list_move_tail(&page->lru,
++                                     &msg->pagelist->head);
++#ifdef CONFIG_BLOCK
++              else if (msg->bio)
++                      iter_bio_next(&msg->bio_iter, &msg->bio_seg);
++#endif
++      }
++}
++
+ /*
+  * Write as much message data payload as we can.  If we finish, queue
+  * up the footer.
+@@ -906,11 +933,11 @@ static int write_partial_msg_pages(struc
+       bool do_datacrc = !con->msgr->nocrc;
+       int ret;
+       int total_max_write;
+-      int in_trail = 0;
++      bool in_trail = false;
+       size_t trail_len = (msg->trail ? msg->trail->length : 0);
+       dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n",
+-           con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages,
++           con, msg, con->out_msg_pos.page, msg->nr_pages,
+            con->out_msg_pos.page_pos);
+ #ifdef CONFIG_BLOCK
+@@ -934,13 +961,12 @@ static int write_partial_msg_pages(struc
+               /* have we reached the trail part of the data? */
+               if (con->out_msg_pos.data_pos >= data_len - trail_len) {
+-                      in_trail = 1;
++                      in_trail = true;
+                       total_max_write = data_len - con->out_msg_pos.data_pos;
+                       page = list_first_entry(&msg->trail->head,
+                                               struct page, lru);
+-                      max_write = PAGE_SIZE;
+               } else if (msg->pages) {
+                       page = msg->pages[con->out_msg_pos.page];
+               } else if (msg->pagelist) {
+@@ -964,14 +990,14 @@ static int write_partial_msg_pages(struc
+               if (do_datacrc && !con->out_msg_pos.did_page_crc) {
+                       void *base;
+                       u32 crc;
+-                      u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc);
++                      u32 tmpcrc = le32_to_cpu(msg->footer.data_crc);
+                       char *kaddr;
+                       kaddr = kmap(page);
+                       BUG_ON(kaddr == NULL);
+                       base = kaddr + con->out_msg_pos.page_pos + bio_offset;
+                       crc = crc32c(tmpcrc, base, len);
+-                      con->out_msg->footer.data_crc = cpu_to_le32(crc);
++                      msg->footer.data_crc = cpu_to_le32(crc);
+                       con->out_msg_pos.did_page_crc = true;
+               }
+               ret = ceph_tcp_sendpage(con->sock, page,
+@@ -984,30 +1010,14 @@ static int write_partial_msg_pages(struc
+               if (ret <= 0)
+                       goto out;
+-              con->out_msg_pos.data_pos += ret;
+-              con->out_msg_pos.page_pos += ret;
+-              if (ret == len) {
+-                      con->out_msg_pos.page_pos = 0;
+-                      con->out_msg_pos.page++;
+-                      con->out_msg_pos.did_page_crc = false;
+-                      if (in_trail)
+-                              list_move_tail(&page->lru,
+-                                             &msg->trail->head);
+-                      else if (msg->pagelist)
+-                              list_move_tail(&page->lru,
+-                                             &msg->pagelist->head);
+-#ifdef CONFIG_BLOCK
+-                      else if (msg->bio)
+-                              iter_bio_next(&msg->bio_iter, &msg->bio_seg);
+-#endif
+-              }
++              out_msg_pos_next(con, page, len, (size_t) ret, in_trail);
+       }
+       dout("write_partial_msg_pages %p msg %p done\n", con, msg);
+       /* prepare and queue up footer, too */
+       if (!do_datacrc)
+-              con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
++              msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
+       con_out_kvec_reset(con);
+       prepare_write_message_footer(con);
+       ret = 1;
diff --git a/queue-3.4/0065-libceph-don-t-mark-footer-complete-before-it-is.patch b/queue-3.4/0065-libceph-don-t-mark-footer-complete-before-it-is.patch
new file mode 100644 (file)
index 0000000..d16abd0
--- /dev/null
@@ -0,0 +1,41 @@
+From 7ee905ae64c2bb36f281ff8b7579b8a3e249f470 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 11 Jun 2012 14:57:13 -0500
+Subject: libceph: don't mark footer complete before it is
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit fd154f3c75465abd83b7a395033e3755908a1e6e)
+
+This is a nit, but prepare_write_message() sets the FOOTER_COMPLETE
+flag before the CRC for the data portion (recorded in the footer)
+has been completely computed.  Hold off setting the complete flag
+until we've decided it's ready to send.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -592,6 +592,8 @@ static void prepare_write_message_footer
+       struct ceph_msg *m = con->out_msg;
+       int v = con->out_kvec_left;
++      m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
++
+       dout("prepare_write_message_footer %p\n", con);
+       con->out_kvec_is_msg = true;
+       con->out_kvec[v].iov_base = &m->footer;
+@@ -665,7 +667,7 @@ static void prepare_write_message(struct
+       /* fill in crc (except data pages), footer */
+       crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
+       con->out_msg->hdr.crc = cpu_to_le32(crc);
+-      con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE;
++      con->out_msg->footer.flags = 0;
+       crc = crc32c(0, m->front.iov_base, m->front.iov_len);
+       con->out_msg->footer.front_crc = cpu_to_le32(crc);
diff --git a/queue-3.4/0066-libceph-move-init_bio_-functions-up.patch b/queue-3.4/0066-libceph-move-init_bio_-functions-up.patch
new file mode 100644 (file)
index 0000000..21f0376
--- /dev/null
@@ -0,0 +1,85 @@
+From ee8eaab277235ddcaaacf1497a06a6b47bce8cc5 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 11 Jun 2012 14:57:13 -0500
+Subject: libceph: move init_bio_*() functions up
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit df6ad1f97342ebc4270128222e896541405eecdb)
+
+Move init_bio_iter() and iter_bio_next() up in their source file so
+the'll be defined before they're needed.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |   50 +++++++++++++++++++++++++-------------------------
+ 1 file changed, 25 insertions(+), 25 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -565,6 +565,31 @@ static void con_out_kvec_add(struct ceph
+       con->out_kvec_bytes += size;
+ }
++#ifdef CONFIG_BLOCK
++static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
++{
++      if (!bio) {
++              *iter = NULL;
++              *seg = 0;
++              return;
++      }
++      *iter = bio;
++      *seg = bio->bi_idx;
++}
++
++static void iter_bio_next(struct bio **bio_iter, int *seg)
++{
++      if (*bio_iter == NULL)
++              return;
++
++      BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
++
++      (*seg)++;
++      if (*seg == (*bio_iter)->bi_vcnt)
++              init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
++}
++#endif
++
+ static void prepare_write_message_data(struct ceph_connection *con)
+ {
+       struct ceph_msg *msg = con->out_msg;
+@@ -868,31 +893,6 @@ out:
+       return ret;  /* done! */
+ }
+-#ifdef CONFIG_BLOCK
+-static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
+-{
+-      if (!bio) {
+-              *iter = NULL;
+-              *seg = 0;
+-              return;
+-      }
+-      *iter = bio;
+-      *seg = bio->bi_idx;
+-}
+-
+-static void iter_bio_next(struct bio **bio_iter, int *seg)
+-{
+-      if (*bio_iter == NULL)
+-              return;
+-
+-      BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
+-
+-      (*seg)++;
+-      if (*seg == (*bio_iter)->bi_vcnt)
+-              init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
+-}
+-#endif
+-
+ static void out_msg_pos_next(struct ceph_connection *con, struct page *page,
+                       size_t len, size_t sent, bool in_trail)
+ {
diff --git a/queue-3.4/0067-libceph-move-init-of-bio_iter.patch b/queue-3.4/0067-libceph-move-init-of-bio_iter.patch
new file mode 100644 (file)
index 0000000..91b56e7
--- /dev/null
@@ -0,0 +1,47 @@
+From c7d4fdc52c6730b8c1f1c90637154152d5bec61c Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 11 Jun 2012 14:57:13 -0500
+Subject: libceph: move init of bio_iter
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 572c588edadaa3da3992bd8a0fed830bbcc861f8)
+
+If a message has a non-null bio pointer, its bio_iter field is
+initialized in write_partial_msg_pages() if this has not been done
+already.  This is really a one-time setup operation for sending a
+message's (bio) data, so move that initialization code into
+prepare_write_message_data() which serves that purpose.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -603,6 +603,10 @@ static void prepare_write_message_data(s
+               con->out_msg_pos.page_pos = msg->page_alignment;
+       else
+               con->out_msg_pos.page_pos = 0;
++#ifdef CONFIG_BLOCK
++      if (msg->bio && !msg->bio_iter)
++              init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
++#endif
+       con->out_msg_pos.data_pos = 0;
+       con->out_msg_pos.did_page_crc = false;
+       con->out_more = 1;  /* data + footer will follow */
+@@ -942,11 +946,6 @@ static int write_partial_msg_pages(struc
+            con, msg, con->out_msg_pos.page, msg->nr_pages,
+            con->out_msg_pos.page_pos);
+-#ifdef CONFIG_BLOCK
+-      if (msg->bio && !msg->bio_iter)
+-              init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
+-#endif
+-
+       while (data_len > con->out_msg_pos.data_pos) {
+               struct page *page = NULL;
+               int max_write = PAGE_SIZE;
diff --git a/queue-3.4/0068-libceph-don-t-use-bio_iter-as-a-flag.patch b/queue-3.4/0068-libceph-don-t-use-bio_iter-as-a-flag.patch
new file mode 100644 (file)
index 0000000..6cab865
--- /dev/null
@@ -0,0 +1,55 @@
+From 3366182e1290e0a4ad8d4ee5ac06388994b6f05a Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 11 Jun 2012 14:57:13 -0500
+Subject: libceph: don't use bio_iter as a flag
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit abdaa6a849af1d63153682c11f5bbb22dacb1f6b)
+
+Recently a bug was fixed in which the bio_iter field in a ceph
+message was not being properly re-initialized when a message got
+re-transmitted:
+    commit 43643528cce60ca184fe8197efa8e8da7c89a037
+    Author: Yan, Zheng <zheng.z.yan@intel.com>
+    rbd: Clear ceph_msg->bio_iter for retransmitted message
+
+We are now only initializing the bio_iter field when we are about to
+start to write message data (in prepare_write_message_data()),
+rather than every time we are attempting to write any portion of the
+message data (in write_partial_msg_pages()).  This means we no
+longer need to use the msg->bio_iter field as a flag.
+
+So just don't do that any more.  Trust prepare_write_message_data()
+to ensure msg->bio_iter is properly initialized, every time we are
+about to begin writing (or re-writing) a message's bio data.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    6 +-----
+ 1 file changed, 1 insertion(+), 5 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -604,7 +604,7 @@ static void prepare_write_message_data(s
+       else
+               con->out_msg_pos.page_pos = 0;
+ #ifdef CONFIG_BLOCK
+-      if (msg->bio && !msg->bio_iter)
++      if (msg->bio)
+               init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
+ #endif
+       con->out_msg_pos.data_pos = 0;
+@@ -672,10 +672,6 @@ static void prepare_write_message(struct
+               m->hdr.seq = cpu_to_le64(++con->out_seq);
+               m->needs_out_seq = false;
+       }
+-#ifdef CONFIG_BLOCK
+-      else
+-              m->bio_iter = NULL;
+-#endif
+       dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
+            m, con->out_seq, le16_to_cpu(m->hdr.type),
diff --git a/queue-3.4/0069-libceph-SOCK_CLOSED-is-a-flag-not-a-state.patch b/queue-3.4/0069-libceph-SOCK_CLOSED-is-a-flag-not-a-state.patch
new file mode 100644 (file)
index 0000000..43ae17a
--- /dev/null
@@ -0,0 +1,45 @@
+From 2ced3e12ab2e6342d911685ed500c8a5497da195 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 20 Jun 2012 21:53:53 -0500
+Subject: libceph: SOCK_CLOSED is a flag, not a state
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit a8d00e3cdef4c1c4f194414b72b24cd995439a05)
+
+The following commit changed it so SOCK_CLOSED bit was stored in
+a connection's new "flags" field rather than its "state" field.
+
+    libceph: start separating connection flags from state
+    commit 928443cd
+
+That bit is used in con_close_socket() to protect against setting an
+error message more than once in the socket event handler function.
+
+Unfortunately, the field being operated on in that function was not
+updated to be "flags" as it should have been.  This fixes that
+error.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -397,11 +397,11 @@ static int con_close_socket(struct ceph_
+       dout("con_close_socket on %p sock %p\n", con, con->sock);
+       if (!con->sock)
+               return 0;
+-      set_bit(SOCK_CLOSED, &con->state);
++      set_bit(SOCK_CLOSED, &con->flags);
+       rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
+       sock_release(con->sock);
+       con->sock = NULL;
+-      clear_bit(SOCK_CLOSED, &con->state);
++      clear_bit(SOCK_CLOSED, &con->flags);
+       con_sock_state_closed(con);
+       return rc;
+ }
diff --git a/queue-3.4/0070-libceph-don-t-change-socket-state-on-sock-event.patch b/queue-3.4/0070-libceph-don-t-change-socket-state-on-sock-event.patch
new file mode 100644 (file)
index 0000000..197b987
--- /dev/null
@@ -0,0 +1,71 @@
+From dceeaf54ac589c72f64f8832e9807becaca99cac Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 20 Jun 2012 21:53:53 -0500
+Subject: libceph: don't change socket state on sock event
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 188048bce311ee41e5178bc3255415d0eae28423)
+
+Currently the socket state change event handler records an error
+message on a connection to distinguish a close while connecting from
+a close while a connection was already established.
+
+Changing connection information during handling of a socket event is
+not very clean, so instead move this assignment inside con_work(),
+where it can be done during normal connection-level processing (and
+under protection of the connection mutex as well).
+
+Move the handling of a socket closed event up to the top of the
+processing loop in con_work(); there's no point in handling backoff
+etc. if we have a newly-closed socket to take care of.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |   18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -261,13 +261,8 @@ static void ceph_sock_state_change(struc
+       case TCP_CLOSE_WAIT:
+               dout("%s TCP_CLOSE_WAIT\n", __func__);
+               con_sock_state_closing(con);
+-              if (test_and_set_bit(SOCK_CLOSED, &con->flags) == 0) {
+-                      if (test_bit(CONNECTING, &con->state))
+-                              con->error_msg = "connection failed";
+-                      else
+-                              con->error_msg = "socket closed";
++              if (!test_and_set_bit(SOCK_CLOSED, &con->flags))
+                       queue_con(con);
+-              }
+               break;
+       case TCP_ESTABLISHED:
+               dout("%s TCP_ESTABLISHED\n", __func__);
+@@ -2187,6 +2182,14 @@ static void con_work(struct work_struct
+       mutex_lock(&con->mutex);
+ restart:
++      if (test_and_clear_bit(SOCK_CLOSED, &con->flags)) {
++              if (test_bit(CONNECTING, &con->state))
++                      con->error_msg = "connection failed";
++              else
++                      con->error_msg = "socket closed";
++              goto fault;
++      }
++
+       if (test_and_clear_bit(BACKOFF, &con->flags)) {
+               dout("con_work %p backing off\n", con);
+               if (queue_delayed_work(ceph_msgr_wq, &con->work,
+@@ -2216,9 +2219,6 @@ restart:
+               con_close_socket(con);
+       }
+-      if (test_and_clear_bit(SOCK_CLOSED, &con->flags))
+-              goto fault;
+-
+       ret = try_read(con);
+       if (ret == -EAGAIN)
+               goto restart;
diff --git a/queue-3.4/0071-libceph-just-set-SOCK_CLOSED-when-state-changes.patch b/queue-3.4/0071-libceph-just-set-SOCK_CLOSED-when-state-changes.patch
new file mode 100644 (file)
index 0000000..89bb3ed
--- /dev/null
@@ -0,0 +1,50 @@
+From 8dce8a3a4ed060800e36545bbabd63e23a459371 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 20 Jun 2012 21:53:53 -0500
+Subject: libceph: just set SOCK_CLOSED when state changes
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit d65c9e0b9eb43d14ece9dd843506ccba06162ee7)
+
+When a TCP_CLOSE or TCP_CLOSE_WAIT event occurs, the SOCK_CLOSED
+connection flag bit is set, and if it had not been previously set
+queue_con() is called to ensure con_work() will get a chance to
+handle the changed state.
+
+con_work() atomically checks--and if set, clears--the SOCK_CLOSED
+bit if it was set.  This means that even if the bit were set
+repeatedly, the related processing in con_work() only gets called
+once per transition of the bit from 0 to 1.
+
+What's important then is that we ensure con_work() gets called *at
+least* once when a socket close event occurs, not that it gets
+called *exactly* once.
+
+The work queue mechanism already takes care of queueing work
+only if it is not already queued, so there's no need for us
+to call queue_con() conditionally.
+
+So this patch just makes it so the SOCK_CLOSED flag gets set
+unconditionally in ceph_sock_state_change().
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -261,8 +261,8 @@ static void ceph_sock_state_change(struc
+       case TCP_CLOSE_WAIT:
+               dout("%s TCP_CLOSE_WAIT\n", __func__);
+               con_sock_state_closing(con);
+-              if (!test_and_set_bit(SOCK_CLOSED, &con->flags))
+-                      queue_con(con);
++              set_bit(SOCK_CLOSED, &con->flags);
++              queue_con(con);
+               break;
+       case TCP_ESTABLISHED:
+               dout("%s TCP_ESTABLISHED\n", __func__);
diff --git a/queue-3.4/0072-libceph-don-t-touch-con-state-in-con_close_socket.patch b/queue-3.4/0072-libceph-don-t-touch-con-state-in-con_close_socket.patch
new file mode 100644 (file)
index 0000000..2cd338d
--- /dev/null
@@ -0,0 +1,52 @@
+From aac3fdffbb4f4822ad58ea99dace727e2fbf3af5 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 20 Jun 2012 21:53:53 -0500
+Subject: libceph: don't touch con state in con_close_socket()
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 456ea46865787283088b23a8a7f69244513b95f0)
+
+In con_close_socket(), a connection's SOCK_CLOSED flag gets set and
+then cleared while its shutdown method is called and its reference
+gets dropped.
+
+Previously, that flag got set only if it had not already been set,
+so setting it in con_close_socket() might have prevented additional
+processing being done on a socket being shut down.  We no longer set
+SOCK_CLOSED in the socket event routine conditionally, so setting
+that bit here no longer provides whatever benefit it might have
+provided before.
+
+A race condition could still leave the SOCK_CLOSED bit set even
+after we've issued the call to con_close_socket(), so we still clear
+that bit after shutting the socket down.  Add a comment explaining
+the reason for this.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -392,10 +392,16 @@ static int con_close_socket(struct ceph_
+       dout("con_close_socket on %p sock %p\n", con, con->sock);
+       if (!con->sock)
+               return 0;
+-      set_bit(SOCK_CLOSED, &con->flags);
+       rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
+       sock_release(con->sock);
+       con->sock = NULL;
++
++      /*
++       * Forcibly clear the SOCK_CLOSE flag.  It gets set
++       * independent of the connection mutex, and we could have
++       * received a socket close event before we had the chance to
++       * shut the socket down.
++       */
+       clear_bit(SOCK_CLOSED, &con->flags);
+       con_sock_state_closed(con);
+       return rc;
diff --git a/queue-3.4/0073-libceph-clear-CONNECTING-in-ceph_con_close.patch b/queue-3.4/0073-libceph-clear-CONNECTING-in-ceph_con_close.patch
new file mode 100644 (file)
index 0000000..8c12c1e
--- /dev/null
@@ -0,0 +1,40 @@
+From 95d4d98b14a94dceb1c9a5e12101bcf76fdb9958 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 20 Jun 2012 21:53:53 -0500
+Subject: libceph: clear CONNECTING in ceph_con_close()
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit bb9e6bba5d8b85b631390f8dbe8a24ae1ff5b48a)
+
+A connection that is closed will no longer be connecting.  So
+clear the CONNECTING state bit in ceph_con_close().  Similarly,
+if the socket has been closed we no longer are in connecting
+state (a new connect sequence will need to be initiated).
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -462,6 +462,7 @@ void ceph_con_close(struct ceph_connecti
+       dout("con_close %p peer %s\n", con,
+            ceph_pr_addr(&con->peer_addr.in_addr));
+       clear_bit(NEGOTIATING, &con->state);
++      clear_bit(CONNECTING, &con->state);
+       clear_bit(STANDBY, &con->state);  /* avoid connect_seq bump */
+       set_bit(CLOSED, &con->state);
+@@ -2189,7 +2190,7 @@ static void con_work(struct work_struct
+       mutex_lock(&con->mutex);
+ restart:
+       if (test_and_clear_bit(SOCK_CLOSED, &con->flags)) {
+-              if (test_bit(CONNECTING, &con->state))
++              if (test_and_clear_bit(CONNECTING, &con->state))
+                       con->error_msg = "connection failed";
+               else
+                       con->error_msg = "socket closed";
diff --git a/queue-3.4/0074-libceph-clear-NEGOTIATING-when-done.patch b/queue-3.4/0074-libceph-clear-NEGOTIATING-when-done.patch
new file mode 100644 (file)
index 0000000..3ca8edd
--- /dev/null
@@ -0,0 +1,60 @@
+From 9694fce886b969ab62035eb3cd8648be0f7984be Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 23 May 2012 14:35:23 -0500
+Subject: libceph: clear NEGOTIATING when done
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 3ec50d1868a9e0493046400bb1fdd054c7f64ebd)
+
+A connection state's NEGOTIATING bit gets set while in CONNECTING
+state after we have successfully exchanged a ceph banner and IP
+addresses with the connection's peer (the server).  But that bit
+is not cleared again--at least not until another connection attempt
+is initiated.
+
+Instead, clear it as soon as the connection is fully established.
+Also, clear it when a socket connection gets prematurely closed
+in the midst of establishing a ceph connection (in case we had
+reached the point where it was set).
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1562,6 +1562,7 @@ static int process_connect(struct ceph_c
+                       fail_protocol(con);
+                       return -1;
+               }
++              clear_bit(NEGOTIATING, &con->state);
+               clear_bit(CONNECTING, &con->state);
+               con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
+               con->connect_seq++;
+@@ -1951,7 +1952,6 @@ more:
+       /* open the socket first? */
+       if (con->sock == NULL) {
+-              clear_bit(NEGOTIATING, &con->state);
+               set_bit(CONNECTING, &con->state);
+               con_out_kvec_reset(con);
+@@ -2190,10 +2190,12 @@ static void con_work(struct work_struct
+       mutex_lock(&con->mutex);
+ restart:
+       if (test_and_clear_bit(SOCK_CLOSED, &con->flags)) {
+-              if (test_and_clear_bit(CONNECTING, &con->state))
++              if (test_and_clear_bit(CONNECTING, &con->state)) {
++                      clear_bit(NEGOTIATING, &con->state);
+                       con->error_msg = "connection failed";
+-              else
++              } else {
+                       con->error_msg = "socket closed";
++              }
+               goto fault;
+       }
diff --git a/queue-3.4/0075-libceph-define-and-use-an-explicit-CONNECTED-state.patch b/queue-3.4/0075-libceph-define-and-use-an-explicit-CONNECTED-state.patch
new file mode 100644 (file)
index 0000000..79bfdf1
--- /dev/null
@@ -0,0 +1,78 @@
+From ad53e0fbfe4fe70c2434fa1ce9707a3f1eb6f211 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 23 May 2012 14:35:23 -0500
+Subject: libceph: define and use an explicit CONNECTED state
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit e27947c767f5bed15048f4e4dad3e2eb69133697)
+
+There is no state explicitly defined when a ceph connection is fully
+operational.  So define one.
+
+It's set when the connection sequence completes successfully, and is
+cleared when the connection gets closed.
+
+Be a little more careful when examining the old state when a socket
+disconnect event is reported.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/messenger.h |    1 +
+ net/ceph/messenger.c           |    9 +++++++--
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -120,6 +120,7 @@ struct ceph_msg_pos {
+  */
+ #define CONNECTING    1
+ #define NEGOTIATING   2
++#define CONNECTED     5
+ #define STANDBY               8  /* no outgoing messages, socket closed.  we keep
+                           * the ceph_connection around to maintain shared
+                           * state with the peer. */
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -463,6 +463,7 @@ void ceph_con_close(struct ceph_connecti
+            ceph_pr_addr(&con->peer_addr.in_addr));
+       clear_bit(NEGOTIATING, &con->state);
+       clear_bit(CONNECTING, &con->state);
++      clear_bit(CONNECTED, &con->state);
+       clear_bit(STANDBY, &con->state);  /* avoid connect_seq bump */
+       set_bit(CLOSED, &con->state);
+@@ -1564,6 +1565,7 @@ static int process_connect(struct ceph_c
+               }
+               clear_bit(NEGOTIATING, &con->state);
+               clear_bit(CONNECTING, &con->state);
++              set_bit(CONNECTED, &con->state);
+               con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
+               con->connect_seq++;
+               con->peer_features = server_feat;
+@@ -2114,6 +2116,7 @@ more:
+                       prepare_read_ack(con);
+                       break;
+               case CEPH_MSGR_TAG_CLOSE:
++                      clear_bit(CONNECTED, &con->state);
+                       set_bit(CLOSED, &con->state);   /* fixme */
+                       goto out;
+               default:
+@@ -2190,11 +2193,13 @@ static void con_work(struct work_struct
+       mutex_lock(&con->mutex);
+ restart:
+       if (test_and_clear_bit(SOCK_CLOSED, &con->flags)) {
+-              if (test_and_clear_bit(CONNECTING, &con->state)) {
++              if (test_and_clear_bit(CONNECTED, &con->state))
++                      con->error_msg = "socket closed";
++              else if (test_and_clear_bit(CONNECTING, &con->state)) {
+                       clear_bit(NEGOTIATING, &con->state);
+                       con->error_msg = "connection failed";
+               } else {
+-                      con->error_msg = "socket closed";
++                      con->error_msg = "unrecognized con state";
+               }
+               goto fault;
+       }
diff --git a/queue-3.4/0076-libceph-separate-banner-and-connect-writes.patch b/queue-3.4/0076-libceph-separate-banner-and-connect-writes.patch
new file mode 100644 (file)
index 0000000..e9e29bf
--- /dev/null
@@ -0,0 +1,120 @@
+From 4e06ab61b13f884af3c1d76bbcd56043efe3fe27 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Thu, 31 May 2012 11:37:29 -0500
+Subject: libceph: separate banner and connect writes
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit ab166d5aa3bc036fba7efaca6e4e43a7e9510acf)
+
+There are two phases in the process of linking together the two ends
+of a ceph connection.  The first involves exchanging a banner and
+IP addresses, and if that is successful a second phase exchanges
+some detail about each side's connection capabilities.
+
+When initiating a connection, the client side now queues to send
+its information for both phases of this process at the same time.
+This is probably a bit more efficient, but it is slightly messier
+from a layering perspective in the code.
+
+So rearrange things so that the client doesn't send the connection
+information until it has received and processed the response in the
+initial banner phase (in process_banner()).
+
+Move the code (in the (con->sock == NULL) case in try_write()) that
+prepares for writing the connection information, delaying doing that
+until the banner exchange has completed.  Move the code that begins
+the transition to this second "NEGOTIATING" phase out of
+process_banner() and into its caller, so preparing to write the
+connection information and preparing to read the response are
+adjacent to each other.
+
+Finally, preparing to write the connection information now requires
+the output kvec to be reset in all cases, so move that into the
+prepare_write_connect() and delete it from all callers.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |   20 +++++++++++---------
+ 1 file changed, 11 insertions(+), 9 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -841,6 +841,7 @@ static int prepare_write_connect(struct
+       con->out_connect.authorizer_len = auth ?
+               cpu_to_le32(auth->authorizer_buf_len) : 0;
++      con_out_kvec_reset(con);
+       con_out_kvec_add(con, sizeof (con->out_connect),
+                                       &con->out_connect);
+       if (auth && auth->authorizer_buf_len)
+@@ -1430,8 +1431,6 @@ static int process_banner(struct ceph_co
+                    ceph_pr_addr(&con->msgr->inst.addr.in_addr));
+       }
+-      set_bit(NEGOTIATING, &con->state);
+-      prepare_read_connect(con);
+       return 0;
+ }
+@@ -1481,7 +1480,6 @@ static int process_connect(struct ceph_c
+                       return -1;
+               }
+               con->auth_retry = 1;
+-              con_out_kvec_reset(con);
+               ret = prepare_write_connect(con);
+               if (ret < 0)
+                       return ret;
+@@ -1502,7 +1500,6 @@ static int process_connect(struct ceph_c
+                      ENTITY_NAME(con->peer_name),
+                      ceph_pr_addr(&con->peer_addr.in_addr));
+               reset_connection(con);
+-              con_out_kvec_reset(con);
+               ret = prepare_write_connect(con);
+               if (ret < 0)
+                       return ret;
+@@ -1528,7 +1525,6 @@ static int process_connect(struct ceph_c
+                    le32_to_cpu(con->out_connect.connect_seq),
+                    le32_to_cpu(con->in_reply.connect_seq));
+               con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
+-              con_out_kvec_reset(con);
+               ret = prepare_write_connect(con);
+               if (ret < 0)
+                       return ret;
+@@ -1545,7 +1541,6 @@ static int process_connect(struct ceph_c
+                    le32_to_cpu(con->in_reply.global_seq));
+               get_global_seq(con->msgr,
+                              le32_to_cpu(con->in_reply.global_seq));
+-              con_out_kvec_reset(con);
+               ret = prepare_write_connect(con);
+               if (ret < 0)
+                       return ret;
+@@ -1958,9 +1953,6 @@ more:
+               con_out_kvec_reset(con);
+               prepare_write_banner(con);
+-              ret = prepare_write_connect(con);
+-              if (ret < 0)
+-                      goto out;
+               prepare_read_banner(con);
+               BUG_ON(con->in_msg);
+@@ -2073,6 +2065,16 @@ more:
+                       ret = process_banner(con);
+                       if (ret < 0)
+                               goto out;
++
++                      /* Banner is good, exchange connection info */
++                      ret = prepare_write_connect(con);
++                      if (ret < 0)
++                              goto out;
++                      prepare_read_connect(con);
++                      set_bit(NEGOTIATING, &con->state);
++
++                      /* Send connection info before awaiting response */
++                      goto out;
+               }
+               ret = read_partial_connect(con);
+               if (ret <= 0)
diff --git a/queue-3.4/0077-libceph-distinguish-two-phases-of-connect-sequence.patch b/queue-3.4/0077-libceph-distinguish-two-phases-of-connect-sequence.patch
new file mode 100644 (file)
index 0000000..778b4f0
--- /dev/null
@@ -0,0 +1,112 @@
+From 7a38d7284d9cf4eeda0f96eef1cb14be618da5c0 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Thu, 24 May 2012 11:55:03 -0500
+Subject: libceph: distinguish two phases of connect sequence
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 7593af920baac37752190a0db703d2732bed4a3b)
+
+Currently a ceph connection enters a "CONNECTING" state when it
+begins the process of (re-)connecting with its peer.  Once the two
+ends have successfully exchanged their banner and addresses, an
+additional NEGOTIATING bit is set in the ceph connection's state to
+indicate the connection information exhange has begun.  The
+CONNECTING bit/state continues to be set during this phase.
+
+Rather than have the CONNECTING state continue while the NEGOTIATING
+bit is set, interpret these two phases as distinct states.  In other
+words, when NEGOTIATING is set, clear CONNECTING.  That way only
+one of them will be active at a time.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |   50 +++++++++++++++++++++++++++-----------------------
+ 1 file changed, 27 insertions(+), 23 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1559,7 +1559,6 @@ static int process_connect(struct ceph_c
+                       return -1;
+               }
+               clear_bit(NEGOTIATING, &con->state);
+-              clear_bit(CONNECTING, &con->state);
+               set_bit(CONNECTED, &con->state);
+               con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
+               con->connect_seq++;
+@@ -2000,7 +1999,8 @@ more_kvec:
+       }
+ do_next:
+-      if (!test_bit(CONNECTING, &con->state)) {
++      if (!test_bit(CONNECTING, &con->state) &&
++                      !test_bit(NEGOTIATING, &con->state)) {
+               /* is anything else pending? */
+               if (!list_empty(&con->out_queue)) {
+                       prepare_write_message(con);
+@@ -2057,25 +2057,29 @@ more:
+       }
+       if (test_bit(CONNECTING, &con->state)) {
+-              if (!test_bit(NEGOTIATING, &con->state)) {
+-                      dout("try_read connecting\n");
+-                      ret = read_partial_banner(con);
+-                      if (ret <= 0)
+-                              goto out;
+-                      ret = process_banner(con);
+-                      if (ret < 0)
+-                              goto out;
+-
+-                      /* Banner is good, exchange connection info */
+-                      ret = prepare_write_connect(con);
+-                      if (ret < 0)
+-                              goto out;
+-                      prepare_read_connect(con);
+-                      set_bit(NEGOTIATING, &con->state);
++              dout("try_read connecting\n");
++              ret = read_partial_banner(con);
++              if (ret <= 0)
++                      goto out;
++              ret = process_banner(con);
++              if (ret < 0)
++                      goto out;
++
++              clear_bit(CONNECTING, &con->state);
++              set_bit(NEGOTIATING, &con->state);
+-                      /* Send connection info before awaiting response */
++              /* Banner is good, exchange connection info */
++              ret = prepare_write_connect(con);
++              if (ret < 0)
+                       goto out;
+-              }
++              prepare_read_connect(con);
++
++              /* Send connection info before awaiting response */
++              goto out;
++      }
++
++      if (test_bit(NEGOTIATING, &con->state)) {
++              dout("try_read negotiating\n");
+               ret = read_partial_connect(con);
+               if (ret <= 0)
+                       goto out;
+@@ -2197,12 +2201,12 @@ restart:
+       if (test_and_clear_bit(SOCK_CLOSED, &con->flags)) {
+               if (test_and_clear_bit(CONNECTED, &con->state))
+                       con->error_msg = "socket closed";
+-              else if (test_and_clear_bit(CONNECTING, &con->state)) {
+-                      clear_bit(NEGOTIATING, &con->state);
++              else if (test_and_clear_bit(NEGOTIATING, &con->state))
++                      con->error_msg = "negotiation failed";
++              else if (test_and_clear_bit(CONNECTING, &con->state))
+                       con->error_msg = "connection failed";
+-              } else {
++              else
+                       con->error_msg = "unrecognized con state";
+-              }
+               goto fault;
+       }
diff --git a/queue-3.4/0078-libceph-small-changes-to-messenger.c.patch b/queue-3.4/0078-libceph-small-changes-to-messenger.c.patch
new file mode 100644 (file)
index 0000000..eae84a4
--- /dev/null
@@ -0,0 +1,143 @@
+From 2203e5cd059ff4aeafebaf4c4efeca850841794e Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 11 Jun 2012 14:57:13 -0500
+Subject: libceph: small changes to messenger.c
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 5821bd8ccdf5d17ab2c391c773756538603838c3)
+
+This patch gathers a few small changes in "net/ceph/messenger.c":
+  out_msg_pos_next()
+    - small logic change that mostly affects indentation
+  write_partial_msg_pages().
+    - use a local variable trail_off to represent the offset into
+      a message of the trail portion of the data (if present)
+    - once we are in the trail portion we will always be there, so we
+      don't always need to check against our data position
+    - avoid computing len twice after we've reached the trail
+    - get rid of the variable tmpcrc, which is not needed
+    - trail_off and trail_len never change so mark them const
+    - update some comments
+  read_partial_message_bio()
+    - bio_iovec_idx() will never return an error, so don't bother
+      checking for it
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |   63 +++++++++++++++++++++++++--------------------------
+ 1 file changed, 31 insertions(+), 32 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -907,21 +907,23 @@ static void out_msg_pos_next(struct ceph
+       con->out_msg_pos.data_pos += sent;
+       con->out_msg_pos.page_pos += sent;
+-      if (sent == len) {
+-              con->out_msg_pos.page_pos = 0;
+-              con->out_msg_pos.page++;
+-              con->out_msg_pos.did_page_crc = false;
+-              if (in_trail)
+-                      list_move_tail(&page->lru,
+-                                     &msg->trail->head);
+-              else if (msg->pagelist)
+-                      list_move_tail(&page->lru,
+-                                     &msg->pagelist->head);
++      if (sent < len)
++              return;
++
++      BUG_ON(sent != len);
++      con->out_msg_pos.page_pos = 0;
++      con->out_msg_pos.page++;
++      con->out_msg_pos.did_page_crc = false;
++      if (in_trail)
++              list_move_tail(&page->lru,
++                             &msg->trail->head);
++      else if (msg->pagelist)
++              list_move_tail(&page->lru,
++                             &msg->pagelist->head);
+ #ifdef CONFIG_BLOCK
+-              else if (msg->bio)
+-                      iter_bio_next(&msg->bio_iter, &msg->bio_seg);
++      else if (msg->bio)
++              iter_bio_next(&msg->bio_iter, &msg->bio_seg);
+ #endif
+-      }
+ }
+ /*
+@@ -940,30 +942,31 @@ static int write_partial_msg_pages(struc
+       int ret;
+       int total_max_write;
+       bool in_trail = false;
+-      size_t trail_len = (msg->trail ? msg->trail->length : 0);
++      const size_t trail_len = (msg->trail ? msg->trail->length : 0);
++      const size_t trail_off = data_len - trail_len;
+       dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n",
+            con, msg, con->out_msg_pos.page, msg->nr_pages,
+            con->out_msg_pos.page_pos);
++      /*
++       * Iterate through each page that contains data to be
++       * written, and send as much as possible for each.
++       *
++       * If we are calculating the data crc (the default), we will
++       * need to map the page.  If we have no pages, they have
++       * been revoked, so use the zero page.
++       */
+       while (data_len > con->out_msg_pos.data_pos) {
+               struct page *page = NULL;
+               int max_write = PAGE_SIZE;
+               int bio_offset = 0;
+-              total_max_write = data_len - trail_len -
+-                      con->out_msg_pos.data_pos;
+-
+-              /*
+-               * if we are calculating the data crc (the default), we need
+-               * to map the page.  if our pages[] has been revoked, use the
+-               * zero page.
+-               */
+-
+-              /* have we reached the trail part of the data? */
+-              if (con->out_msg_pos.data_pos >= data_len - trail_len) {
+-                      in_trail = true;
++              in_trail = in_trail || con->out_msg_pos.data_pos >= trail_off;
++              if (!in_trail)
++                      total_max_write = trail_off - con->out_msg_pos.data_pos;
++              if (in_trail) {
+                       total_max_write = data_len - con->out_msg_pos.data_pos;
+                       page = list_first_entry(&msg->trail->head,
+@@ -990,14 +993,13 @@ static int write_partial_msg_pages(struc
+               if (do_datacrc && !con->out_msg_pos.did_page_crc) {
+                       void *base;
+-                      u32 crc;
+-                      u32 tmpcrc = le32_to_cpu(msg->footer.data_crc);
++                      u32 crc = le32_to_cpu(msg->footer.data_crc);
+                       char *kaddr;
+                       kaddr = kmap(page);
+                       BUG_ON(kaddr == NULL);
+                       base = kaddr + con->out_msg_pos.page_pos + bio_offset;
+-                      crc = crc32c(tmpcrc, base, len);
++                      crc = crc32c(crc, base, len);
+                       msg->footer.data_crc = cpu_to_le32(crc);
+                       con->out_msg_pos.did_page_crc = true;
+               }
+@@ -1702,9 +1704,6 @@ static int read_partial_message_bio(stru
+       void *p;
+       int ret, left;
+-      if (IS_ERR(bv))
+-              return PTR_ERR(bv);
+-
+       left = min((int)(data_len - con->in_msg_pos.data_pos),
+                  (int)(bv->bv_len - con->in_msg_pos.page_pos));
diff --git a/queue-3.4/0079-libceph-add-some-fine-ASCII-art.patch b/queue-3.4/0079-libceph-add-some-fine-ASCII-art.patch
new file mode 100644 (file)
index 0000000..083d726
--- /dev/null
@@ -0,0 +1,70 @@
+From d31a25445536005c730ff305acef54719aac7f23 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Wed, 20 Jun 2012 21:53:53 -0500
+Subject: libceph: add some fine ASCII art
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit bc18f4b1c850ab355e38373fbb60fd28568d84b5)
+
+Sage liked the state diagram I put in my commit description so
+I'm putting it in with the code.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |   42 +++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 41 insertions(+), 1 deletion(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -29,7 +29,47 @@
+  * the sender.
+  */
+-/* State values for ceph_connection->sock_state; NEW is assumed to be 0 */
++/*
++ * We track the state of the socket on a given connection using
++ * values defined below.  The transition to a new socket state is
++ * handled by a function which verifies we aren't coming from an
++ * unexpected state.
++ *
++ *      --------
++ *      | NEW* |  transient initial state
++ *      --------
++ *          | con_sock_state_init()
++ *          v
++ *      ----------
++ *      | CLOSED |  initialized, but no socket (and no
++ *      ----------  TCP connection)
++ *       ^      \
++ *       |       \ con_sock_state_connecting()
++ *       |        ----------------------
++ *       |                              \
++ *       + con_sock_state_closed()       \
++ *       |\                               \
++ *       | \                               \
++ *       |  -----------                     \
++ *       |  | CLOSING |  socket event;       \
++ *       |  -----------  await close          \
++ *       |       ^                            |
++ *       |       |                            |
++ *       |       + con_sock_state_closing()   |
++ *       |      / \                           |
++ *       |     /   ---------------            |
++ *       |    /                   \           v
++ *       |   /                    --------------
++ *       |  /    -----------------| CONNECTING |  socket created, TCP
++ *       |  |   /                 --------------  connect initiated
++ *       |  |   | con_sock_state_connected()
++ *       |  |   v
++ *      -------------
++ *      | CONNECTED |  TCP connection established
++ *      -------------
++ *
++ * State values for ceph_connection->sock_state; NEW is assumed to be 0.
++ */
+ #define CON_SOCK_STATE_NEW            0       /* -> CLOSED */
+ #define CON_SOCK_STATE_CLOSED         1       /* -> CONNECTING */
index a2f21cda4a2c6625c5f9dbcc479ad3a4804be319..cf692eb10b7db4a6515289f6bf6bdad9d636d7f9 100644 (file)
@@ -112,3 +112,23 @@ selinux-fix-sel_netnode_insert-suspicious-rcu-dereference.patch
 0057-libceph-fix-overflow-in-osdmap_decode.patch
 0058-libceph-fix-overflow-in-osdmap_apply_incremental.patch
 0059-libceph-transition-socket-state-prior-to-actual-conn.patch
+0060-libceph-fix-NULL-dereference-in-reset_connection.patch
+0061-libceph-use-con-get-put-methods.patch
+0062-libceph-drop-ceph_con_get-put-helpers-and-nref-membe.patch
+0063-libceph-encapsulate-out-message-data-setup.patch
+0064-libceph-encapsulate-advancing-msg-page.patch
+0065-libceph-don-t-mark-footer-complete-before-it-is.patch
+0066-libceph-move-init_bio_-functions-up.patch
+0067-libceph-move-init-of-bio_iter.patch
+0068-libceph-don-t-use-bio_iter-as-a-flag.patch
+0069-libceph-SOCK_CLOSED-is-a-flag-not-a-state.patch
+0070-libceph-don-t-change-socket-state-on-sock-event.patch
+0071-libceph-just-set-SOCK_CLOSED-when-state-changes.patch
+0072-libceph-don-t-touch-con-state-in-con_close_socket.patch
+0073-libceph-clear-CONNECTING-in-ceph_con_close.patch
+0074-libceph-clear-NEGOTIATING-when-done.patch
+0075-libceph-define-and-use-an-explicit-CONNECTED-state.patch
+0076-libceph-separate-banner-and-connect-writes.patch
+0077-libceph-distinguish-two-phases-of-connect-sequence.patch
+0078-libceph-small-changes-to-messenger.c.patch
+0079-libceph-add-some-fine-ASCII-art.patch