]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
bpf: Handle in-place update for full LPM trie correctly
authorHou Tao <houtao1@huawei.com>
Fri, 6 Dec 2024 11:06:17 +0000 (19:06 +0800)
committerAlexei Starovoitov <ast@kernel.org>
Fri, 6 Dec 2024 17:14:26 +0000 (09:14 -0800)
When a LPM trie is full, in-place updates of existing elements
incorrectly return -ENOSPC.

Fix this by deferring the check of trie->n_entries. For new insertions,
n_entries must not exceed max_entries. However, in-place updates are
allowed even when the trie is full.

Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation")
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20241206110622.1161752-5-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
kernel/bpf/lpm_trie.c

index be5bf03895328c7eb524d2a4da841ba93d8adba3..df6cc0a1c9bf751683646099c10a591eb6aa2856 100644 (file)
@@ -310,6 +310,16 @@ static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie,
        return node;
 }
 
+static int trie_check_add_elem(struct lpm_trie *trie, u64 flags)
+{
+       if (flags == BPF_EXIST)
+               return -ENOENT;
+       if (trie->n_entries == trie->map.max_entries)
+               return -ENOSPC;
+       trie->n_entries++;
+       return 0;
+}
+
 /* Called from syscall or from eBPF program */
 static long trie_update_elem(struct bpf_map *map,
                             void *_key, void *value, u64 flags)
@@ -333,20 +343,12 @@ static long trie_update_elem(struct bpf_map *map,
        spin_lock_irqsave(&trie->lock, irq_flags);
 
        /* Allocate and fill a new node */
-
-       if (trie->n_entries == trie->map.max_entries) {
-               ret = -ENOSPC;
-               goto out;
-       }
-
        new_node = lpm_trie_node_alloc(trie, value);
        if (!new_node) {
                ret = -ENOMEM;
                goto out;
        }
 
-       trie->n_entries++;
-
        new_node->prefixlen = key->prefixlen;
        RCU_INIT_POINTER(new_node->child[0], NULL);
        RCU_INIT_POINTER(new_node->child[1], NULL);
@@ -375,10 +377,10 @@ static long trie_update_elem(struct bpf_map *map,
         * simply assign the @new_node to that slot and be done.
         */
        if (!node) {
-               if (flags == BPF_EXIST) {
-                       ret = -ENOENT;
+               ret = trie_check_add_elem(trie, flags);
+               if (ret)
                        goto out;
-               }
+
                rcu_assign_pointer(*slot, new_node);
                goto out;
        }
@@ -392,10 +394,10 @@ static long trie_update_elem(struct bpf_map *map,
                                ret = -EEXIST;
                                goto out;
                        }
-                       trie->n_entries--;
-               } else if (flags == BPF_EXIST) {
-                       ret = -ENOENT;
-                       goto out;
+               } else {
+                       ret = trie_check_add_elem(trie, flags);
+                       if (ret)
+                               goto out;
                }
 
                new_node->child[0] = node->child[0];
@@ -407,10 +409,9 @@ static long trie_update_elem(struct bpf_map *map,
                goto out;
        }
 
-       if (flags == BPF_EXIST) {
-               ret = -ENOENT;
+       ret = trie_check_add_elem(trie, flags);
+       if (ret)
                goto out;
-       }
 
        /* If the new node matches the prefix completely, it must be inserted
         * as an ancestor. Simply insert it between @node and *@slot.
@@ -424,6 +425,7 @@ static long trie_update_elem(struct bpf_map *map,
 
        im_node = lpm_trie_node_alloc(trie, NULL);
        if (!im_node) {
+               trie->n_entries--;
                ret = -ENOMEM;
                goto out;
        }
@@ -445,12 +447,8 @@ static long trie_update_elem(struct bpf_map *map,
        rcu_assign_pointer(*slot, im_node);
 
 out:
-       if (ret) {
-               if (new_node)
-                       trie->n_entries--;
+       if (ret)
                kfree(new_node);
-       }
-
        spin_unlock_irqrestore(&trie->lock, irq_flags);
        kfree_rcu(free_node, rcu);