3.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 21 Nov 2012 20:15:24 +0000 (12:15 -0800)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 21 Nov 2012 20:15:24 +0000 (12:15 -0800)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 21 Nov 2012 20:15:24 +0000 (12:15 -0800)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 21 Nov 2012 20:15:24 +0000 (12:15 -0800)
diff --git a/queue-3.4/0100-libceph-fix-handling-of-immediate-socket-connect-fai.patch b/queue-3.4/0100-libceph-fix-handling-of-immediate-socket-connect-fai.patch

new file mode 100644 (file)

index 0000000..4fcb7d2
--- /dev/null
+++ b/queue-3.4/0100-libceph-fix-handling-of-immediate-socket-connect-fai.patch
@@ -0,0 +1,99 @@
+From 943c8fdafdac2d1de3e9e22729b335b1367497db Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Mon, 30 Jul 2012 18:16:16 -0700
+Subject: libceph: fix handling of immediate socket connect failure
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 8007b8d626b49c34fb146ec16dc639d8b10c862f)
+
+If the connect() call immediately fails such that sock == NULL, we
+still need con_close_socket() to reset our socket state to CLOSED.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |   26 +++++++++++++++++++-------
+ 1 file changed, 19 insertions(+), 7 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -224,6 +224,8 @@ static void con_sock_state_init(struct c
+       old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
+       if (WARN_ON(old_state != CON_SOCK_STATE_NEW))
+               printk("%s: unexpected old state %d\n", __func__, old_state);
++      dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
++           CON_SOCK_STATE_CLOSED);
+ }
+ 
+ static void con_sock_state_connecting(struct ceph_connection *con)
+@@ -233,6 +235,8 @@ static void con_sock_state_connecting(st
+       old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTING);
+       if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED))
+               printk("%s: unexpected old state %d\n", __func__, old_state);
++      dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
++           CON_SOCK_STATE_CONNECTING);
+ }
+ 
+ static void con_sock_state_connected(struct ceph_connection *con)
+@@ -242,6 +246,8 @@ static void con_sock_state_connected(str
+       old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED);
+       if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING))
+               printk("%s: unexpected old state %d\n", __func__, old_state);
++      dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
++           CON_SOCK_STATE_CONNECTED);
+ }
+ 
+ static void con_sock_state_closing(struct ceph_connection *con)
+@@ -253,6 +259,8 @@ static void con_sock_state_closing(struc
+                       old_state != CON_SOCK_STATE_CONNECTED &&
+                       old_state != CON_SOCK_STATE_CLOSING))
+               printk("%s: unexpected old state %d\n", __func__, old_state);
++      dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
++           CON_SOCK_STATE_CLOSING);
+ }
+ 
+ static void con_sock_state_closed(struct ceph_connection *con)
+@@ -262,8 +270,11 @@ static void con_sock_state_closed(struct
+       old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
+       if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED &&
+                   old_state != CON_SOCK_STATE_CLOSING &&
+-                  old_state != CON_SOCK_STATE_CONNECTING))
++                  old_state != CON_SOCK_STATE_CONNECTING &&
++                  old_state != CON_SOCK_STATE_CLOSED))
+               printk("%s: unexpected old state %d\n", __func__, old_state);
++      dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
++           CON_SOCK_STATE_CLOSED);
+ }
+ 
+ /*
+@@ -448,14 +459,14 @@ static int ceph_tcp_sendpage(struct sock
+  */
+ static int con_close_socket(struct ceph_connection *con)
+ {
+-      int rc;
++      int rc = 0;
+ 
+       dout("con_close_socket on %p sock %p\n", con, con->sock);
+-      if (!con->sock)
+-              return 0;
+-      rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
+-      sock_release(con->sock);
+-      con->sock = NULL;
++      if (con->sock) {
++              rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
++              sock_release(con->sock);
++              con->sock = NULL;
++      }
+ 
+       /*
+        * Forcibly clear the SOCK_CLOSED flag.  It gets set
+@@ -464,6 +475,7 @@ static int con_close_socket(struct ceph_
+        * shut the socket down.
+        */
+       clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
++
+       con_sock_state_closed(con);
+       return rc;
+ }
diff --git a/queue-3.4/0101-libceph-revoke-mon_client-messages-on-session-restar.patch b/queue-3.4/0101-libceph-revoke-mon_client-messages-on-session-restar.patch

new file mode 100644 (file)

index 0000000..e9cc152
--- /dev/null
+++ b/queue-3.4/0101-libceph-revoke-mon_client-messages-on-session-restar.patch
@@ -0,0 +1,40 @@
+From 76c4b09bee3b5ec29a233bb929b0c8d5eddf044f Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Mon, 30 Jul 2012 18:16:40 -0700
+Subject: libceph: revoke mon_client messages on session restart
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 4f471e4a9c7db0256834e1b376ea50c82e345c3c)
+
+Revoke all mon_client messages when we shut down the old connection.
+This is mostly moot since we are re-using the same ceph_connection,
+but it is cleaner.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/mon_client.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -118,6 +118,9 @@ static void __close_session(struct ceph_
+ {
+       dout("__close_session closing mon%d\n", monc->cur_mon);
+       ceph_msg_revoke(monc->m_auth);
++      ceph_msg_revoke_incoming(monc->m_auth_reply);
++      ceph_msg_revoke(monc->m_subscribe);
++      ceph_msg_revoke_incoming(monc->m_subscribe_ack);
+       ceph_con_close(&monc->con);
+       monc->cur_mon = -1;
+       monc->pending_auth = 0;
+@@ -685,6 +688,7 @@ static void __resend_generic_request(str
+       for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) {
+               req = rb_entry(p, struct ceph_mon_generic_request, node);
+               ceph_msg_revoke(req->request);
++              ceph_msg_revoke_incoming(req->reply);
+               ceph_con_send(&monc->con, ceph_msg_get(req->request));
+       }
+ }
diff --git a/queue-3.4/0102-libceph-verify-state-after-retaking-con-lock-after-d.patch b/queue-3.4/0102-libceph-verify-state-after-retaking-con-lock-after-d.patch

new file mode 100644 (file)

index 0000000..ded1d1f
--- /dev/null
+++ b/queue-3.4/0102-libceph-verify-state-after-retaking-con-lock-after-d.patch
@@ -0,0 +1,40 @@
+From b48298b26c610c074d7bbae2a2b72468bd82232d Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Mon, 30 Jul 2012 18:16:56 -0700
+Subject: libceph: verify state after retaking con lock after dispatch
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 7b862e07b1a4d5c963d19027f10ea78085f27f9b)
+
+We drop the con mutex when delivering a message.  When we retake the
+lock, we need to verify we are still in the OPEN state before
+preparing to read the next tag, or else we risk stepping on a
+connection that has been closed.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2003,7 +2003,6 @@ static void process_message(struct ceph_
+       con->ops->dispatch(con, msg);
+ 
+       mutex_lock(&con->mutex);
+-      prepare_read_tag(con);
+ }
+ 
+ 
+@@ -2213,6 +2212,8 @@ more:
+               if (con->in_tag == CEPH_MSGR_TAG_READY)
+                       goto more;
+               process_message(con);
++              if (con->state == CON_STATE_OPEN)
++                      prepare_read_tag(con);
+               goto more;
+       }
+       if (con->in_tag == CEPH_MSGR_TAG_ACK) {
diff --git a/queue-3.4/0103-libceph-avoid-dropping-con-mutex-before-fault.patch b/queue-3.4/0103-libceph-avoid-dropping-con-mutex-before-fault.patch

new file mode 100644 (file)

index 0000000..3698d59
--- /dev/null
+++ b/queue-3.4/0103-libceph-avoid-dropping-con-mutex-before-fault.patch
@@ -0,0 +1,45 @@
+From e857e5c6addd0fea4d145e818e24b1a75e41550f Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Mon, 30 Jul 2012 18:17:13 -0700
+Subject: libceph: avoid dropping con mutex before fault
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 8636ea672f0c5ab7478c42c5b6705ebd1db7eb6a)
+
+The ceph_fault() function takes the con mutex, so we should avoid
+dropping it before calling it.  This fixes a potential race with
+another thread calling ceph_con_close(), or _open(), or similar (we
+don't reverify con->state after retaking the lock).
+
+Add annotation so that lockdep realizes we will drop the mutex before
+returning.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2336,7 +2336,6 @@ done_unlocked:
+       return;
+ 
+ fault:
+-      mutex_unlock(&con->mutex);
+       ceph_fault(con);     /* error/fault path */
+       goto done_unlocked;
+ }
+@@ -2347,9 +2346,8 @@ fault:
+  * exponential backoff
+  */
+ static void ceph_fault(struct ceph_connection *con)
++      __releases(con->mutex)
+ {
+-      mutex_lock(&con->mutex);
+-
+       pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
+              ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
+       dout("fault %p state %lu to peer %s\n",
diff --git a/queue-3.4/0104-libceph-change-ceph_con_in_msg_alloc-convention-to-b.patch b/queue-3.4/0104-libceph-change-ceph_con_in_msg_alloc-convention-to-b.patch

new file mode 100644 (file)

index 0000000..c819d37
--- /dev/null
+++ b/queue-3.4/0104-libceph-change-ceph_con_in_msg_alloc-convention-to-b.patch
@@ -0,0 +1,142 @@
+From 8e27649af41cc8b9e8a1e4a510a24491bbf66525 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Mon, 30 Jul 2012 18:19:30 -0700
+Subject: libceph: change ceph_con_in_msg_alloc convention to be less weird
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 4740a623d20c51d167da7f752b63e2b8714b2543)
+
+This function's calling convention is very limiting.  In particular,
+we can't return any error other than ENOMEM (and only implicitly),
+which is a problem (see next patch).
+
+Instead, return an normal 0 or error code, and make the skip a pointer
+output parameter.  Drop the useless in_hdr argument (we have the con
+pointer).
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |   56 ++++++++++++++++++++++++++++-----------------------
+ 1 file changed, 31 insertions(+), 25 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1733,9 +1733,7 @@ static int read_partial_message_section(
+       return 1;
+ }
+ 
+-static bool ceph_con_in_msg_alloc(struct ceph_connection *con,
+-                              struct ceph_msg_header *hdr);
+-
++static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);
+ 
+ static int read_partial_message_pages(struct ceph_connection *con,
+                                     struct page **pages,
+@@ -1864,9 +1862,14 @@ static int read_partial_message(struct c
+ 
+       /* allocate message? */
+       if (!con->in_msg) {
++              int skip = 0;
++
+               dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
+                    con->in_hdr.front_len, con->in_hdr.data_len);
+-              if (ceph_con_in_msg_alloc(con, &con->in_hdr)) {
++              ret = ceph_con_in_msg_alloc(con, &skip);
++              if (ret < 0)
++                      return ret;
++              if (skip) {
+                       /* skip this message */
+                       dout("alloc_msg said skip message\n");
+                       BUG_ON(con->in_msg);
+@@ -1876,12 +1879,8 @@ static int read_partial_message(struct c
+                       con->in_seq++;
+                       return 0;
+               }
+-              if (!con->in_msg) {
+-                      con->error_msg =
+-                              "error allocating memory for incoming message";
+-                      return -ENOMEM;
+-              }
+ 
++              BUG_ON(!con->in_msg);
+               BUG_ON(con->in_msg->con != con);
+               m = con->in_msg;
+               m->front.iov_len = 0;    /* haven't read it yet */
+@@ -2715,43 +2714,50 @@ static int ceph_alloc_middle(struct ceph
+  * connection, and save the result in con->in_msg.  Uses the
+  * connection's private alloc_msg op if available.
+  *
+- * Returns true if the message should be skipped, false otherwise.
+- * If true is returned (skip message), con->in_msg will be NULL.
+- * If false is returned, con->in_msg will contain a pointer to the
+- * newly-allocated message, or NULL in case of memory exhaustion.
++ * Returns 0 on success, or a negative error code.
++ *
++ * On success, if we set *skip = 1:
++ *  - the next message should be skipped and ignored.
++ *  - con->in_msg == NULL
++ * or if we set *skip = 0:
++ *  - con->in_msg is non-null.
++ * On error (ENOMEM, EAGAIN, ...),
++ *  - con->in_msg == NULL
+  */
+-static bool ceph_con_in_msg_alloc(struct ceph_connection *con,
+-                              struct ceph_msg_header *hdr)
++static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
+ {
++      struct ceph_msg_header *hdr = &con->in_hdr;
+       int type = le16_to_cpu(hdr->type);
+       int front_len = le32_to_cpu(hdr->front_len);
+       int middle_len = le32_to_cpu(hdr->middle_len);
+-      int ret;
++      int ret = 0;
+ 
+       BUG_ON(con->in_msg != NULL);
+ 
+       if (con->ops->alloc_msg) {
+-              int skip = 0;
+-
+               mutex_unlock(&con->mutex);
+-              con->in_msg = con->ops->alloc_msg(con, hdr, &skip);
++              con->in_msg = con->ops->alloc_msg(con, hdr, skip);
+               mutex_lock(&con->mutex);
+               if (con->in_msg) {
+                       con->in_msg->con = con->ops->get(con);
+                       BUG_ON(con->in_msg->con == NULL);
+               }
+-              if (skip)
++              if (*skip) {
+                       con->in_msg = NULL;
+-
+-              if (!con->in_msg)
+-                      return skip != 0;
++                      return 0;
++              }
++              if (!con->in_msg) {
++                      con->error_msg =
++                              "error allocating memory for incoming message";
++                      return -ENOMEM;
++              }
+       }
+       if (!con->in_msg) {
+               con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
+               if (!con->in_msg) {
+                       pr_err("unable to allocate msg type %d len %d\n",
+                              type, front_len);
+-                      return false;
++                      return -ENOMEM;
+               }
+               con->in_msg->con = con->ops->get(con);
+               BUG_ON(con->in_msg->con == NULL);
+@@ -2767,7 +2773,7 @@ static bool ceph_con_in_msg_alloc(struct
+               }
+       }
+ 
+-      return false;
++      return ret;
+ }
+ 
+ 
diff --git a/queue-3.4/0105-libceph-recheck-con-state-after-allocating-incoming-.patch b/queue-3.4/0105-libceph-recheck-con-state-after-allocating-incoming-.patch

new file mode 100644 (file)

index 0000000..c6833ef
--- /dev/null
+++ b/queue-3.4/0105-libceph-recheck-con-state-after-allocating-incoming-.patch
@@ -0,0 +1,42 @@
+From 80a2e83ea3db4f98b2875490193f1e602c5e8252 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Mon, 30 Jul 2012 18:19:45 -0700
+Subject: libceph: recheck con state after allocating incoming message
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 6139919133377652992a5fe134e22abce3e9c25e)
+
+We drop the lock when calling the ->alloc_msg() con op, which means
+we need to (a) not clobber con->in_msg without the mutex held, and (b)
+we need to verify that we are still in the OPEN state when we retake
+it to avoid causing any mayhem.  If the state does change, -EAGAIN
+will get us back to con_work() and loop.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2735,9 +2735,16 @@ static int ceph_con_in_msg_alloc(struct
+       BUG_ON(con->in_msg != NULL);
+ 
+       if (con->ops->alloc_msg) {
++              struct ceph_msg *msg;
++
+               mutex_unlock(&con->mutex);
+-              con->in_msg = con->ops->alloc_msg(con, hdr, skip);
++              msg = con->ops->alloc_msg(con, hdr, skip);
+               mutex_lock(&con->mutex);
++              if (con->state != CON_STATE_OPEN) {
++                      ceph_msg_put(msg);
++                      return -EAGAIN;
++              }
++              con->in_msg = msg;
+               if (con->in_msg) {
+                       con->in_msg->con = con->ops->get(con);
+                       BUG_ON(con->in_msg->con == NULL);
diff --git a/queue-3.4/0106-libceph-fix-crypto-key-null-deref-memory-leak.patch b/queue-3.4/0106-libceph-fix-crypto-key-null-deref-memory-leak.patch

new file mode 100644 (file)

index 0000000..9626a1b
--- /dev/null
+++ b/queue-3.4/0106-libceph-fix-crypto-key-null-deref-memory-leak.patch
@@ -0,0 +1,43 @@
+From b7dfcf07b04fda8c8ba6cf17ea5477e9b79f9bc8 Mon Sep 17 00:00:00 2001
+From: Sylvain Munaut <tnt@246tNt.com>
+Date: Thu, 2 Aug 2012 09:12:59 -0700
+Subject: libceph: fix crypto key null deref, memory leak
+
+From: Sylvain Munaut <tnt@246tNt.com>
+
+(cherry picked from commit f0666b1ac875ff32fe290219b150ec62eebbe10e)
+
+Avoid crashing if the crypto key payload was NULL, as when it was not correctly
+allocated and initialized.  Also, avoid leaking it.
+
+Signed-off-by: Sylvain Munaut <tnt@246tNt.com>
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/crypto.c |    1 +
+ net/ceph/crypto.h |    3 ++-
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/ceph/crypto.c
++++ b/net/ceph/crypto.c
+@@ -466,6 +466,7 @@ void ceph_key_destroy(struct key *key) {
+       struct ceph_crypto_key *ckey = key->payload.data;
+ 
+       ceph_crypto_key_destroy(ckey);
++      kfree(ckey);
+ }
+ 
+ struct key_type key_type_ceph = {
+--- a/net/ceph/crypto.h
++++ b/net/ceph/crypto.h
+@@ -16,7 +16,8 @@ struct ceph_crypto_key {
+ 
+ static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
+ {
+-      kfree(key->key);
++      if (key)
++              kfree(key->key);
+ }
+ 
+ extern int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
diff --git a/queue-3.4/0107-libceph-delay-debugfs-initialization-until-we-learn-.patch b/queue-3.4/0107-libceph-delay-debugfs-initialization-until-we-learn-.patch

new file mode 100644 (file)

index 0000000..6684764
--- /dev/null
+++ b/queue-3.4/0107-libceph-delay-debugfs-initialization-until-we-learn-.patch
@@ -0,0 +1,161 @@
+From 05dd9ae3b48666e55d7de0cbdd3521099495fd85 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Sun, 19 Aug 2012 12:29:16 -0700
+Subject: libceph: delay debugfs initialization until we learn global_id
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit d1c338a509cea5378df59629ad47382810c38623)
+
+The debugfs directory includes the cluster fsid and our unique global_id.
+We need to delay the initialization of the debug entry until we have
+learned both the fsid and our global_id from the monitor or else the
+second client can't create its debugfs entry and will fail (and multiple
+client instances aren't properly reflected in debugfs).
+
+Reported by: Yan, Zheng <zheng.z.yan@intel.com>
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/debugfs.c      |    1 
+ net/ceph/ceph_common.c |    1 
+ net/ceph/debugfs.c     |    4 +++
+ net/ceph/mon_client.c  |   51 ++++++++++++++++++++++++++++++++++++++++++++-----
+ 4 files changed, 51 insertions(+), 6 deletions(-)
+
+--- a/fs/ceph/debugfs.c
++++ b/fs/ceph/debugfs.c
+@@ -201,6 +201,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_
+       int err = -ENOMEM;
+ 
+       dout("ceph_fs_debugfs_init\n");
++      BUG_ON(!fsc->client->debugfs_dir);
+       fsc->debugfs_congestion_kb =
+               debugfs_create_file("writeback_congestion_kb",
+                                   0600,
+--- a/net/ceph/ceph_common.c
++++ b/net/ceph/ceph_common.c
+@@ -83,7 +83,6 @@ int ceph_check_fsid(struct ceph_client *
+                       return -1;
+               }
+       } else {
+-              pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid);
+               memcpy(&client->fsid, fsid, sizeof(*fsid));
+       }
+       return 0;
+--- a/net/ceph/debugfs.c
++++ b/net/ceph/debugfs.c
+@@ -189,6 +189,9 @@ int ceph_debugfs_client_init(struct ceph
+       snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
+                client->monc.auth->global_id);
+ 
++      dout("ceph_debugfs_client_init %p %s\n", client, name);
++
++      BUG_ON(client->debugfs_dir);
+       client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
+       if (!client->debugfs_dir)
+               goto out;
+@@ -234,6 +237,7 @@ out:
+ 
+ void ceph_debugfs_client_cleanup(struct ceph_client *client)
+ {
++      dout("ceph_debugfs_client_cleanup %p\n", client);
+       debugfs_remove(client->debugfs_osdmap);
+       debugfs_remove(client->debugfs_monmap);
+       debugfs_remove(client->osdc.debugfs_file);
+--- a/net/ceph/mon_client.c
++++ b/net/ceph/mon_client.c
+@@ -311,6 +311,17 @@ int ceph_monc_open_session(struct ceph_m
+ EXPORT_SYMBOL(ceph_monc_open_session);
+ 
+ /*
++ * We require the fsid and global_id in order to initialize our
++ * debugfs dir.
++ */
++static bool have_debugfs_info(struct ceph_mon_client *monc)
++{
++      dout("have_debugfs_info fsid %d globalid %lld\n",
++           (int)monc->client->have_fsid, monc->auth->global_id);
++      return monc->client->have_fsid && monc->auth->global_id > 0;
++}
++
++/*
+  * The monitor responds with mount ack indicate mount success.  The
+  * included client ticket allows the client to talk to MDSs and OSDs.
+  */
+@@ -320,9 +331,12 @@ static void ceph_monc_handle_map(struct
+       struct ceph_client *client = monc->client;
+       struct ceph_monmap *monmap = NULL, *old = monc->monmap;
+       void *p, *end;
++      int had_debugfs_info, init_debugfs = 0;
+ 
+       mutex_lock(&monc->mutex);
+ 
++      had_debugfs_info = have_debugfs_info(monc);
++
+       dout("handle_monmap\n");
+       p = msg->front.iov_base;
+       end = p + msg->front.iov_len;
+@@ -344,12 +358,22 @@ static void ceph_monc_handle_map(struct
+ 
+       if (!client->have_fsid) {
+               client->have_fsid = true;
++              if (!had_debugfs_info && have_debugfs_info(monc)) {
++                      pr_info("client%lld fsid %pU\n",
++                              ceph_client_id(monc->client),
++                              &monc->client->fsid);
++                      init_debugfs = 1;
++              }
+               mutex_unlock(&monc->mutex);
+-              /*
+-               * do debugfs initialization without mutex to avoid
+-               * creating a locking dependency
+-               */
+-              ceph_debugfs_client_init(client);
++
++              if (init_debugfs) {
++                      /*
++                       * do debugfs initialization without mutex to avoid
++                       * creating a locking dependency
++                       */
++                      ceph_debugfs_client_init(monc->client);
++              }
++
+               goto out_unlocked;
+       }
+ out:
+@@ -865,8 +889,10 @@ static void handle_auth_reply(struct cep
+ {
+       int ret;
+       int was_auth = 0;
++      int had_debugfs_info, init_debugfs = 0;
+ 
+       mutex_lock(&monc->mutex);
++      had_debugfs_info = have_debugfs_info(monc);
+       if (monc->auth->ops)
+               was_auth = monc->auth->ops->is_authenticated(monc->auth);
+       monc->pending_auth = 0;
+@@ -889,7 +915,22 @@ static void handle_auth_reply(struct cep
+               __send_subscribe(monc);
+               __resend_generic_request(monc);
+       }
++
++      if (!had_debugfs_info && have_debugfs_info(monc)) {
++              pr_info("client%lld fsid %pU\n",
++                      ceph_client_id(monc->client),
++                      &monc->client->fsid);
++              init_debugfs = 1;
++      }
+       mutex_unlock(&monc->mutex);
++
++      if (init_debugfs) {
++              /*
++               * do debugfs initialization without mutex to avoid
++               * creating a locking dependency
++               */
++              ceph_debugfs_client_init(monc->client);
++      }
+ }
+ 
+ static int __validate_auth(struct ceph_mon_client *monc)
diff --git a/queue-3.4/0108-libceph-avoid-truncation-due-to-racing-banners.patch b/queue-3.4/0108-libceph-avoid-truncation-due-to-racing-banners.patch

new file mode 100644 (file)

index 0000000..297e262
--- /dev/null
+++ b/queue-3.4/0108-libceph-avoid-truncation-due-to-racing-banners.patch
@@ -0,0 +1,105 @@
+From 3d77d4329854550407f153d8ab910a83b54895ac Mon Sep 17 00:00:00 2001
+From: Jim Schutt <jaschut@sandia.gov>
+Date: Fri, 10 Aug 2012 10:37:38 -0700
+Subject: libceph: avoid truncation due to racing banners
+
+From: Jim Schutt <jaschut@sandia.gov>
+
+(cherry picked from commit 6d4221b53707486dfad3f5bfe568d2ce7f4c9863)
+
+Because the Ceph client messenger uses a non-blocking connect, it is
+possible for the sending of the client banner to race with the
+arrival of the banner sent by the peer.
+
+When ceph_sock_state_change() notices the connect has completed, it
+schedules work to process the socket via con_work().  During this
+time the peer is writing its banner, and arrival of the peer banner
+races with con_work().
+
+If con_work() calls try_read() before the peer banner arrives, there
+is nothing for it to do, after which con_work() calls try_write() to
+send the client's banner.  In this case Ceph's protocol negotiation
+can complete succesfully.
+
+The server-side messenger immediately sends its banner and addresses
+after accepting a connect request, *before* actually attempting to
+read or verify the banner from the client.  As a result, it is
+possible for the banner from the server to arrive before con_work()
+calls try_read().  If that happens, try_read() will read the banner
+and prepare protocol negotiation info via prepare_write_connect().
+prepare_write_connect() calls con_out_kvec_reset(), which discards
+the as-yet-unsent client banner.  Next, con_work() calls
+try_write(), which sends the protocol negotiation info rather than
+the banner that the peer is expecting.
+
+The result is that the peer sees an invalid banner, and the client
+reports "negotiation failed".
+
+Fix this by moving con_out_kvec_reset() out of
+prepare_write_connect() to its callers at all locations except the
+one where the banner might still need to be sent.
+
+[elder@inktak.com: added note about server-side behavior]
+
+Signed-off-by: Jim Schutt <jaschut@sandia.gov>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -911,7 +911,6 @@ static int prepare_write_connect(struct
+       con->out_connect.authorizer_len = auth ?
+               cpu_to_le32(auth->authorizer_buf_len) : 0;
+ 
+-      con_out_kvec_reset(con);
+       con_out_kvec_add(con, sizeof (con->out_connect),
+                                       &con->out_connect);
+       if (auth && auth->authorizer_buf_len)
+@@ -1553,6 +1552,7 @@ static int process_connect(struct ceph_c
+                       return -1;
+               }
+               con->auth_retry = 1;
++              con_out_kvec_reset(con);
+               ret = prepare_write_connect(con);
+               if (ret < 0)
+                       return ret;
+@@ -1573,6 +1573,7 @@ static int process_connect(struct ceph_c
+                      ENTITY_NAME(con->peer_name),
+                      ceph_pr_addr(&con->peer_addr.in_addr));
+               reset_connection(con);
++              con_out_kvec_reset(con);
+               ret = prepare_write_connect(con);
+               if (ret < 0)
+                       return ret;
+@@ -1597,6 +1598,7 @@ static int process_connect(struct ceph_c
+                    le32_to_cpu(con->out_connect.connect_seq),
+                    le32_to_cpu(con->in_reply.connect_seq));
+               con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
++              con_out_kvec_reset(con);
+               ret = prepare_write_connect(con);
+               if (ret < 0)
+                       return ret;
+@@ -1613,6 +1615,7 @@ static int process_connect(struct ceph_c
+                    le32_to_cpu(con->in_reply.global_seq));
+               get_global_seq(con->msgr,
+                              le32_to_cpu(con->in_reply.global_seq));
++              con_out_kvec_reset(con);
+               ret = prepare_write_connect(con);
+               if (ret < 0)
+                       return ret;
+@@ -2131,7 +2134,11 @@ more:
+               BUG_ON(con->state != CON_STATE_CONNECTING);
+               con->state = CON_STATE_NEGOTIATING;
+ 
+-              /* Banner is good, exchange connection info */
++              /*
++               * Received banner is good, exchange connection info.
++               * Do not reset out_kvec, as sending our banner raced
++               * with receiving peer banner after connect completed.
++               */
+               ret = prepare_write_connect(con);
+               if (ret < 0)
+                       goto out;
diff --git a/queue-3.4/0109-libceph-only-kunmap-kmapped-pages.patch b/queue-3.4/0109-libceph-only-kunmap-kmapped-pages.patch

new file mode 100644 (file)

index 0000000..9838953
--- /dev/null
+++ b/queue-3.4/0109-libceph-only-kunmap-kmapped-pages.patch
@@ -0,0 +1,48 @@
+From 16d4243614b9ce6df10fdf00f04b60260d136a8f Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Fri, 21 Sep 2012 17:59:58 -0500
+Subject: libceph: only kunmap kmapped pages
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 5ce765a540f34d1e2005e1210f49f67fdf11e997)
+
+In write_partial_msg_pages(), pages need to be kmapped in order to
+perform a CRC-32c calculation on them.  As an artifact of the way
+this code used to be structured, the kunmap() call was separated
+from the kmap() call and both were done conditionally.  But the
+conditions under which the kmap() and kunmap() calls were made
+differed, so there was a chance a kunmap() call would be done on a
+page that had not been mapped.
+
+The symptom of this was tripping a BUG() in kunmap_high() when
+pkmap_count[nr] became 0.
+
+Reported-by: Bryan K. Wright <bryan@virginia.edu>
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1069,16 +1069,13 @@ static int write_partial_msg_pages(struc
+                       BUG_ON(kaddr == NULL);
+                       base = kaddr + con->out_msg_pos.page_pos + bio_offset;
+                       crc = crc32c(crc, base, len);
++                      kunmap(page);
+                       msg->footer.data_crc = cpu_to_le32(crc);
+                       con->out_msg_pos.did_page_crc = true;
+               }
+               ret = ceph_tcp_sendpage(con->sock, page,
+                                     con->out_msg_pos.page_pos + bio_offset,
+                                     len, 1);
+-
+-              if (do_datacrc)
+-                      kunmap(page);
+-
+               if (ret <= 0)
+                       goto out;
+ 
diff --git a/queue-3.4/0110-rbd-reset-BACKOFF-if-unable-to-re-queue.patch b/queue-3.4/0110-rbd-reset-BACKOFF-if-unable-to-re-queue.patch

new file mode 100644 (file)

index 0000000..ca1b486
--- /dev/null
+++ b/queue-3.4/0110-rbd-reset-BACKOFF-if-unable-to-re-queue.patch
@@ -0,0 +1,58 @@
+From 21440342ecfb1754307048e554227da17af79501 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Mon, 8 Oct 2012 20:37:30 -0700
+Subject: rbd: reset BACKOFF if unable to re-queue
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 588377d6199034c36d335e7df5818b731fea072c)
+
+If ceph_fault() is unable to queue work after a delay, it sets the
+BACKOFF connection flag so con_work() will attempt to do so.
+
+In con_work(), when BACKOFF is set, if queue_delayed_work() doesn't
+result in newly-queued work, it simply ignores this condition and
+proceeds as if no backoff delay were desired.  There are two
+problems with this--one of which is a bug.
+
+The first problem is simply that the intended behavior is to back
+off, and if we aren't able queue the work item to run after a delay
+we're not doing that.
+
+The only reason queue_delayed_work() won't queue work is if the
+provided work item is already queued.  In the messenger, this
+means that con_work() is already scheduled to be run again.  So
+if we simply set the BACKOFF flag again when this occurs, we know
+the next con_work() call will again attempt to hold off activity
+on the connection until after the delay.
+
+The second problem--the bug--is a leak of a reference count.  If
+queue_delayed_work() returns 0 in con_work(), con->ops->put() drops
+the connection reference held on entry to con_work().  However,
+processing is (was) allowed to continue, and at the end of the
+function a second con->ops->put() is called.
+
+This patch fixes both problems.
+
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2296,10 +2296,11 @@ restart:
+                       mutex_unlock(&con->mutex);
+                       return;
+               } else {
+-                      con->ops->put(con);
+                       dout("con_work %p FAILED to back off %lu\n", con,
+                            con->delay);
++                      set_bit(CON_FLAG_BACKOFF, &con->flags);
+               }
++              goto done;
+       }
+ 
+       if (con->state == CON_STATE_STANDBY) {
diff --git a/queue-3.4/0111-libceph-avoid-NULL-kref_put-when-osd-reset-races-wit.patch b/queue-3.4/0111-libceph-avoid-NULL-kref_put-when-osd-reset-races-wit.patch

new file mode 100644 (file)

index 0000000..5a23a9d
--- /dev/null
+++ b/queue-3.4/0111-libceph-avoid-NULL-kref_put-when-osd-reset-races-wit.patch
@@ -0,0 +1,36 @@
+From 5959b68da32488a3031c51a43df0750acd11d644 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Wed, 24 Oct 2012 16:12:58 -0700
+Subject: libceph: avoid NULL kref_put when osd reset races with alloc_msg
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit 9bd952615a42d7e2ce3fa2c632e808e804637a1a)
+
+The ceph_on_in_msg_alloc() method drops con->mutex while it allocates a
+message.  If that races with a timeout that resends a zillion messages and
+resets the connection, and the ->alloc_msg() method returns a NULL message,
+it will call ceph_msg_put(NULL) and BUG.
+
+Fix by only calling put if msg is non-NULL.
+
+Fixes http://tracker.newdream.net/issues/3142
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2746,7 +2746,8 @@ static int ceph_con_in_msg_alloc(struct
+               msg = con->ops->alloc_msg(con, hdr, skip);
+               mutex_lock(&con->mutex);
+               if (con->state != CON_STATE_OPEN) {
+-                      ceph_msg_put(msg);
++                      if (msg)
++                              ceph_msg_put(msg);
+                       return -EAGAIN;
+               }
+               con->in_msg = msg;
diff --git a/queue-3.4/0112-ceph-Fix-oops-when-handling-mdsmap-that-decreases-ma.patch b/queue-3.4/0112-ceph-Fix-oops-when-handling-mdsmap-that-decreases-ma.patch

new file mode 100644 (file)

index 0000000..c732dc0
--- /dev/null
+++ b/queue-3.4/0112-ceph-Fix-oops-when-handling-mdsmap-that-decreases-ma.patch
@@ -0,0 +1,31 @@
+From 69e87a5dfec164710d7e5dc5d7da4c2ac2abb754 Mon Sep 17 00:00:00 2001
+From: "Yan, Zheng" <zheng.z.yan@intel.com>
+Date: Thu, 20 Sep 2012 17:42:25 +0800
+Subject: ceph: Fix oops when handling mdsmap that decreases max_mds
+
+From: "Yan, Zheng" <zheng.z.yan@intel.com>
+
+(cherry picked from commit 3e8f43a089f06279c5f76a9ccd42578eebf7bfa5)
+
+When i >= newmap->m_max_mds, ceph_mdsmap_get_addr(newmap, i) return
+NULL. Passing NULL to memcmp() triggers oops.
+
+Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
+Signed-off-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/mds_client.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -2634,7 +2634,8 @@ static void check_new_map(struct ceph_md
+                    ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
+                    session_state_name(s->s_state));
+ 
+-              if (memcmp(ceph_mdsmap_get_addr(oldmap, i),
++              if (i >= newmap->m_max_mds ||
++                  memcmp(ceph_mdsmap_get_addr(oldmap, i),
+                          ceph_mdsmap_get_addr(newmap, i),
+                          sizeof(struct ceph_entity_addr))) {
+                       if (s->s_state == CEPH_MDS_SESSION_OPENING) {
diff --git a/queue-3.4/0113-libceph-check-for-invalid-mapping.patch b/queue-3.4/0113-libceph-check-for-invalid-mapping.patch

new file mode 100644 (file)

index 0000000..5a41b4a
--- /dev/null
+++ b/queue-3.4/0113-libceph-check-for-invalid-mapping.patch
@@ -0,0 +1,165 @@
+From 4a8f60298277d7f7d3347ebae67b5a8705f36a9c Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@inktank.com>
+Date: Mon, 24 Sep 2012 20:59:48 -0700
+Subject: libceph: check for invalid mapping
+
+From: Sage Weil <sage@inktank.com>
+
+(cherry picked from commit d63b77f4c552cc3a20506871046ab0fcbc332609)
+
+If we encounter an invalid (e.g., zeroed) mapping, return an error
+and avoid a divide by zero.
+
+Signed-off-by: Sage Weil <sage@inktank.com>
+Reviewed-by: Alex Elder <elder@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/osd_client.h |    2 +-
+ include/linux/ceph/osdmap.h     |    6 +++---
+ net/ceph/osd_client.c           |   32 ++++++++++++++++++++------------
+ net/ceph/osdmap.c               |   18 ++++++++++++++++--
+ 4 files changed, 40 insertions(+), 18 deletions(-)
+
+--- a/include/linux/ceph/osd_client.h
++++ b/include/linux/ceph/osd_client.h
+@@ -207,7 +207,7 @@ extern void ceph_osdc_handle_reply(struc
+ extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
+                                struct ceph_msg *msg);
+ 
+-extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
++extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
+                       struct ceph_file_layout *layout,
+                       u64 snapid,
+                       u64 off, u64 *plen, u64 *bno,
+--- a/include/linux/ceph/osdmap.h
++++ b/include/linux/ceph/osdmap.h
+@@ -111,9 +111,9 @@ extern struct ceph_osdmap *osdmap_apply_
+ extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
+ 
+ /* calculate mapping of a file extent to an object */
+-extern void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
+-                                        u64 off, u64 *plen,
+-                                        u64 *bno, u64 *oxoff, u64 *oxlen);
++extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
++                                       u64 off, u64 *plen,
++                                       u64 *bno, u64 *oxoff, u64 *oxlen);
+ 
+ /* calculate mapping of object to a placement group */
+ extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -52,7 +52,7 @@ static int op_has_extent(int op)
+               op == CEPH_OSD_OP_WRITE);
+ }
+ 
+-void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
++int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
+                       struct ceph_file_layout *layout,
+                       u64 snapid,
+                       u64 off, u64 *plen, u64 *bno,
+@@ -62,12 +62,15 @@ void ceph_calc_raw_layout(struct ceph_os
+       struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
+       u64 orig_len = *plen;
+       u64 objoff, objlen;    /* extent in object */
++      int r;
+ 
+       reqhead->snapid = cpu_to_le64(snapid);
+ 
+       /* object extent? */
+-      ceph_calc_file_object_mapping(layout, off, plen, bno,
+-                                    &objoff, &objlen);
++      r = ceph_calc_file_object_mapping(layout, off, plen, bno,
++                                        &objoff, &objlen);
++      if (r < 0)
++              return r;
+       if (*plen < orig_len)
+               dout(" skipping last %llu, final file extent %llu~%llu\n",
+                    orig_len - *plen, off, *plen);
+@@ -83,7 +86,7 @@ void ceph_calc_raw_layout(struct ceph_os
+ 
+       dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
+            *bno, objoff, objlen, req->r_num_pages);
+-
++      return 0;
+ }
+ EXPORT_SYMBOL(ceph_calc_raw_layout);
+ 
+@@ -112,20 +115,25 @@ EXPORT_SYMBOL(ceph_calc_raw_layout);
+  *
+  * fill osd op in request message.
+  */
+-static void calc_layout(struct ceph_osd_client *osdc,
+-                      struct ceph_vino vino,
+-                      struct ceph_file_layout *layout,
+-                      u64 off, u64 *plen,
+-                      struct ceph_osd_request *req,
+-                      struct ceph_osd_req_op *op)
++static int calc_layout(struct ceph_osd_client *osdc,
++                     struct ceph_vino vino,
++                     struct ceph_file_layout *layout,
++                     u64 off, u64 *plen,
++                     struct ceph_osd_request *req,
++                     struct ceph_osd_req_op *op)
+ {
+       u64 bno;
++      int r;
+ 
+-      ceph_calc_raw_layout(osdc, layout, vino.snap, off,
+-                           plen, &bno, req, op);
++      r = ceph_calc_raw_layout(osdc, layout, vino.snap, off,
++                               plen, &bno, req, op);
++      if (r < 0)
++              return r;
+ 
+       snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno);
+       req->r_oid_len = strlen(req->r_oid);
++
++      return r;
+ }
+ 
+ /*
+--- a/net/ceph/osdmap.c
++++ b/net/ceph/osdmap.c
+@@ -952,7 +952,7 @@ bad:
+  * for now, we write only a single su, until we can
+  * pass a stride back to the caller.
+  */
+-void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
++int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
+                                  u64 off, u64 *plen,
+                                  u64 *ono,
+                                  u64 *oxoff, u64 *oxlen)
+@@ -966,11 +966,17 @@ void ceph_calc_file_object_mapping(struc
+ 
+       dout("mapping %llu~%llu  osize %u fl_su %u\n", off, *plen,
+            osize, su);
++      if (su == 0 || sc == 0)
++              goto invalid;
+       su_per_object = osize / su;
++      if (su_per_object == 0)
++              goto invalid;
+       dout("osize %u / su %u = su_per_object %u\n", osize, su,
+            su_per_object);
+ 
+-      BUG_ON((su & ~PAGE_MASK) != 0);
++      if ((su & ~PAGE_MASK) != 0)
++              goto invalid;
++
+       /* bl = *off / su; */
+       t = off;
+       do_div(t, su);
+@@ -998,6 +1004,14 @@ void ceph_calc_file_object_mapping(struc
+       *plen = *oxlen;
+ 
+       dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
++      return 0;
++
++invalid:
++      dout(" invalid layout\n");
++      *ono = 0;
++      *oxoff = 0;
++      *oxlen = 0;
++      return -EINVAL;
+ }
+ EXPORT_SYMBOL(ceph_calc_file_object_mapping);
+ 
diff --git a/queue-3.4/0114-ceph-avoid-32-bit-page-index-overflow.patch b/queue-3.4/0114-ceph-avoid-32-bit-page-index-overflow.patch

new file mode 100644 (file)

index 0000000..46532aa
--- /dev/null
+++ b/queue-3.4/0114-ceph-avoid-32-bit-page-index-overflow.patch
@@ -0,0 +1,78 @@
+From b2980af7974e38de2db64094fd6a90c8f0f06d08 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@inktank.com>
+Date: Tue, 2 Oct 2012 10:25:51 -0500
+Subject: ceph: avoid 32-bit page index overflow
+
+From: Alex Elder <elder@inktank.com>
+
+(cherry picked from commit 6285bc231277419255f3498d3eb5ddc9f8e7fe79)
+
+A pgoff_t is defined (by default) to have type (unsigned long).  On
+architectures such as i686 that's a 32-bit type.  The ceph address
+space code was attempting to produce 64 bit offsets by shifting a
+page's index by PAGE_CACHE_SHIFT, but the result was not what was
+desired because the shift occurred before the result got promoted
+to 64 bits.
+
+Fix this by converting all uses of page->index used in this way to
+use the page_offset() macro, which ensures the 64-bit result has the
+intended value.
+
+This fixes http://tracker.newdream.net/issues/3112
+
+Reported-by:  Mohamed Pakkeer <pakkeer.mohideen@realimage.com>
+Signed-off-by: Alex Elder <elder@inktank.com>
+Reviewed-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/addr.c |   11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+--- a/fs/ceph/addr.c
++++ b/fs/ceph/addr.c
+@@ -205,7 +205,7 @@ static int readpage_nounlock(struct file
+       dout("readpage inode %p file %p page %p index %lu\n",
+            inode, filp, page, page->index);
+       err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
+-                                page->index << PAGE_CACHE_SHIFT, &len,
++                                (u64) page_offset(page), &len,
+                                 ci->i_truncate_seq, ci->i_truncate_size,
+                                 &page, 1, 0);
+       if (err == -ENOENT)
+@@ -286,7 +286,7 @@ static int start_read(struct inode *inod
+       int nr_pages = 0;
+       int ret;
+ 
+-      off = page->index << PAGE_CACHE_SHIFT;
++      off = (u64) page_offset(page);
+ 
+       /* count pages */
+       next_index = page->index;
+@@ -426,7 +426,7 @@ static int writepage_nounlock(struct pag
+       struct ceph_inode_info *ci;
+       struct ceph_fs_client *fsc;
+       struct ceph_osd_client *osdc;
+-      loff_t page_off = page->index << PAGE_CACHE_SHIFT;
++      loff_t page_off = page_offset(page);
+       int len = PAGE_CACHE_SIZE;
+       loff_t i_size;
+       int err = 0;
+@@ -817,8 +817,7 @@ get_more_pages:
+                       /* ok */
+                       if (locked_pages == 0) {
+                               /* prepare async write request */
+-                              offset = (unsigned long long)page->index
+-                                      << PAGE_CACHE_SHIFT;
++                              offset = (u64) page_offset(page);
+                               len = wsize;
+                               req = ceph_osdc_new_request(&fsc->client->osdc,
+                                           &ci->i_layout,
+@@ -1180,7 +1179,7 @@ static int ceph_page_mkwrite(struct vm_a
+       struct inode *inode = vma->vm_file->f_dentry->d_inode;
+       struct page *page = vmf->page;
+       struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+-      loff_t off = page->index << PAGE_CACHE_SHIFT;
++      loff_t off = page_offset(page);
+       loff_t size, len;
+       int ret;
+ 
diff --git a/queue-3.4/series b/queue-3.4/series

index 492fb7dda1118cf214b3d0d2f4fec33115ce41ae..b4dd0cc883ba79b5221f104270c5f8357d800631 100644 (file)
--- a/queue-3.4/series
+++ b/queue-3.4/series
@@ -152,3 +152,18 @@ selinux-fix-sel_netnode_insert-suspicious-rcu-dereference.patch
  0097-libceph-replace-connection-state-bits-with-states.patch
  0098-libceph-clean-up-con-flags.patch
  0099-libceph-clear-all-flags-on-con_close.patch
+0100-libceph-fix-handling-of-immediate-socket-connect-fai.patch
+0101-libceph-revoke-mon_client-messages-on-session-restar.patch
+0102-libceph-verify-state-after-retaking-con-lock-after-d.patch
+0103-libceph-avoid-dropping-con-mutex-before-fault.patch
+0104-libceph-change-ceph_con_in_msg_alloc-convention-to-b.patch
+0105-libceph-recheck-con-state-after-allocating-incoming-.patch
+0106-libceph-fix-crypto-key-null-deref-memory-leak.patch
+0107-libceph-delay-debugfs-initialization-until-we-learn-.patch
+0108-libceph-avoid-truncation-due-to-racing-banners.patch
+0109-libceph-only-kunmap-kmapped-pages.patch
+0110-rbd-reset-BACKOFF-if-unable-to-re-queue.patch
+0111-libceph-avoid-NULL-kref_put-when-osd-reset-races-wit.patch
+0112-ceph-Fix-oops-when-handling-mdsmap-that-decreases-ma.patch
+0113-libceph-check-for-invalid-mapping.patch
+0114-ceph-avoid-32-bit-page-index-overflow.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 21 Nov 2012 20:15:24 +0000 (12:15 -0800)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 21 Nov 2012 20:15:24 +0000 (12:15 -0800)
queue-3.4/0100-libceph-fix-handling-of-immediate-socket-connect-fai.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/0101-libceph-revoke-mon_client-messages-on-session-restar.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/0102-libceph-verify-state-after-retaking-con-lock-after-d.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/0103-libceph-avoid-dropping-con-mutex-before-fault.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/0104-libceph-change-ceph_con_in_msg_alloc-convention-to-b.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/0105-libceph-recheck-con-state-after-allocating-incoming-.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/0106-libceph-fix-crypto-key-null-deref-memory-leak.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/0107-libceph-delay-debugfs-initialization-until-we-learn-.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/0108-libceph-avoid-truncation-due-to-racing-banners.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/0109-libceph-only-kunmap-kmapped-pages.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/0110-rbd-reset-BACKOFF-if-unable-to-re-queue.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/0111-libceph-avoid-NULL-kref_put-when-osd-reset-races-wit.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/0112-ceph-Fix-oops-when-handling-mdsmap-that-decreases-ma.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/0113-libceph-check-for-invalid-mapping.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/0114-ceph-avoid-32-bit-page-index-overflow.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/series		patch \| blob \| blame \| history