]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
ceph: try to allocate enough memory for reserved caps
authorZhi Zhang <zhang.david2011@gmail.com>
Wed, 24 Jan 2018 13:24:33 +0000 (21:24 +0800)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 29 Jan 2018 17:36:12 +0000 (18:36 +0100)
ceph_reserve_caps() may not reserve enough caps under high memory
pressure, but it saved the needed caps number that expected to
be reserved. When getting caps, crash would happen due to number
mismatch.

Now we will try to trim more caps when failing to allocate memory
for caps need to be reserved, then try again. If still failing to
allocate memory, return -ENOMEM.

Signed-off-by: Zhi Zhang <zhang.david2011@gmail.com>
Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
fs/ceph/caps.c
fs/ceph/mds_client.c
fs/ceph/mds_client.h
fs/ceph/super.h

index 1726ddcf5e34fcc6640322152388d6a654d9b698..8e66fb0e743d1facddc9aa8192f7222256e7b312 100644 (file)
@@ -154,13 +154,19 @@ void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta)
        spin_unlock(&mdsc->caps_list_lock);
 }
 
-void ceph_reserve_caps(struct ceph_mds_client *mdsc,
+/*
+ * Called under mdsc->mutex.
+ */
+int ceph_reserve_caps(struct ceph_mds_client *mdsc,
                      struct ceph_cap_reservation *ctx, int need)
 {
-       int i;
+       int i, j;
        struct ceph_cap *cap;
        int have;
        int alloc = 0;
+       int max_caps;
+       bool trimmed = false;
+       struct ceph_mds_session *s;
        LIST_HEAD(newcaps);
 
        dout("reserve caps ctx=%p need=%d\n", ctx, need);
@@ -179,16 +185,37 @@ void ceph_reserve_caps(struct ceph_mds_client *mdsc,
        spin_unlock(&mdsc->caps_list_lock);
 
        for (i = have; i < need; i++) {
+retry:
                cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
-               if (!cap)
-                       break;
+               if (!cap) {
+                       if (!trimmed) {
+                               for (j = 0; j < mdsc->max_sessions; j++) {
+                                       s = __ceph_lookup_mds_session(mdsc, j);
+                                       if (!s)
+                                               continue;
+                                       mutex_unlock(&mdsc->mutex);
+
+                                       mutex_lock(&s->s_mutex);
+                                       max_caps = s->s_nr_caps - (need - i);
+                                       ceph_trim_caps(mdsc, s, max_caps);
+                                       mutex_unlock(&s->s_mutex);
+
+                                       ceph_put_mds_session(s);
+                                       mutex_lock(&mdsc->mutex);
+                               }
+                               trimmed = true;
+                               goto retry;
+                       } else {
+                               pr_warn("reserve caps ctx=%p ENOMEM "
+                                       "need=%d got=%d\n",
+                                       ctx, need, have + alloc);
+                               goto out_nomem;
+                       }
+               }
                list_add(&cap->caps_item, &newcaps);
                alloc++;
        }
-       /* we didn't manage to reserve as much as we needed */
-       if (have + alloc != need)
-               pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n",
-                       ctx, need, have + alloc);
+       BUG_ON(have + alloc != need);
 
        spin_lock(&mdsc->caps_list_lock);
        mdsc->caps_total_count += alloc;
@@ -204,6 +231,24 @@ void ceph_reserve_caps(struct ceph_mds_client *mdsc,
        dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n",
             ctx, mdsc->caps_total_count, mdsc->caps_use_count,
             mdsc->caps_reserve_count, mdsc->caps_avail_count);
+       return 0;
+
+out_nomem:
+       while (!list_empty(&newcaps)) {
+               cap = list_first_entry(&newcaps,
+                               struct ceph_cap, caps_item);
+               list_del(&cap->caps_item);
+               kmem_cache_free(ceph_cap_cachep, cap);
+       }
+
+       spin_lock(&mdsc->caps_list_lock);
+       mdsc->caps_avail_count += have;
+       mdsc->caps_reserve_count -= have;
+       BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+                                        mdsc->caps_reserve_count +
+                                        mdsc->caps_avail_count);
+       spin_unlock(&mdsc->caps_list_lock);
+       return -ENOMEM;
 }
 
 int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
index 251dc44d84a04fc4504e713dfb166cb4cb5c08e8..2e8f90f96540291ab412703133d2a4278edbac71 100644 (file)
@@ -604,10 +604,20 @@ static void __register_request(struct ceph_mds_client *mdsc,
                               struct ceph_mds_request *req,
                               struct inode *dir)
 {
+       int ret = 0;
+
        req->r_tid = ++mdsc->last_tid;
-       if (req->r_num_caps)
-               ceph_reserve_caps(mdsc, &req->r_caps_reservation,
-                                 req->r_num_caps);
+       if (req->r_num_caps) {
+               ret = ceph_reserve_caps(mdsc, &req->r_caps_reservation,
+                                       req->r_num_caps);
+               if (ret < 0) {
+                       pr_err("__register_request %p "
+                              "failed to reserve caps: %d\n", req, ret);
+                       /* set req->r_err to fail early from __do_request */
+                       req->r_err = ret;
+                       return;
+               }
+       }
        dout("__register_request %p tid %lld\n", req, req->r_tid);
        ceph_mdsc_get_request(req);
        insert_request(&mdsc->request_tree, req);
@@ -1545,9 +1555,9 @@ out:
 /*
  * Trim session cap count down to some max number.
  */
-static int trim_caps(struct ceph_mds_client *mdsc,
-                    struct ceph_mds_session *session,
-                    int max_caps)
+int ceph_trim_caps(struct ceph_mds_client *mdsc,
+                  struct ceph_mds_session *session,
+                  int max_caps)
 {
        int trim_caps = session->s_nr_caps - max_caps;
 
@@ -2776,7 +2786,7 @@ static void handle_session(struct ceph_mds_session *session,
                break;
 
        case CEPH_SESSION_RECALL_STATE:
-               trim_caps(mdsc, session, le32_to_cpu(h->max_caps));
+               ceph_trim_caps(mdsc, session, le32_to_cpu(h->max_caps));
                break;
 
        case CEPH_SESSION_FLUSHMSG:
index 837ac4b087a0babb0f202ae66e79688d8f57822c..71e3b783ee6fae5a64ae0a4a3eb1471c9fd9ed44 100644 (file)
@@ -444,4 +444,7 @@ ceph_mdsc_open_export_target_session(struct ceph_mds_client *mdsc, int target);
 extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
                                          struct ceph_mds_session *session);
 
+extern int ceph_trim_caps(struct ceph_mds_client *mdsc,
+                         struct ceph_mds_session *session,
+                         int max_caps);
 #endif
index 601100da738f865c90f37dda9f628c01c2e106a7..21b2e5b004eb72ba10057df37c6907766aed5993 100644 (file)
@@ -649,7 +649,7 @@ extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check);
 extern void ceph_caps_init(struct ceph_mds_client *mdsc);
 extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
 extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta);
-extern void ceph_reserve_caps(struct ceph_mds_client *mdsc,
+extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
                             struct ceph_cap_reservation *ctx, int need);
 extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
                               struct ceph_cap_reservation *ctx);