From dc22be505adb4b5820856f15b00d4a7795e059d9 Mon Sep 17 00:00:00 2001 From: Yogesh Singh Date: Sun, 17 Apr 2022 17:17:00 +0530 Subject: [PATCH] Short Description: Raise RLIMIT_MEMLOCK automatically when eBPF is requested. This PR adds changes to eBPF filter constructor which when invoked automatically raises the RLIMIT_MEMLOCK from 64k to 1024k. The hard limit for the user needs to be set in `/etc/security/limits.conf`. --- .github/actions/spell-check/expect.txt | 1 + pdns/bpf-filter.cc | 21 +++++++++++++++++++++ pdns/dnsdistdist/docs/advanced/ebpf.rst | 6 +++++- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/.github/actions/spell-check/expect.txt b/.github/actions/spell-check/expect.txt index d398b5cfb7..ab74213b18 100644 --- a/.github/actions/spell-check/expect.txt +++ b/.github/actions/spell-check/expect.txt @@ -988,6 +988,7 @@ MBOXFW mbytes Meerwald Mekking +memlock MEMLOCK Memusage menuselection diff --git a/pdns/bpf-filter.cc b/pdns/bpf-filter.cc index 6bdf84d5cb..7d5e018370 100644 --- a/pdns/bpf-filter.cc +++ b/pdns/bpf-filter.cc @@ -21,10 +21,12 @@ */ #include "bpf-filter.hh" #include "iputils.hh" +#include "dolog.hh" #ifdef HAVE_EBPF #include +#include #include #include "ext/libbpf/libbpf.h" @@ -354,6 +356,25 @@ BPFFilter::BPFFilter(std::unordered_map& configs, throw std::runtime_error("Unsupported eBPF map format, the current internal implemenation only supports the legacy format"); } + struct rlimit old_limit; + const rlim_t new_limit_size = 1024 * 1024; + + if (getrlimit(RLIMIT_MEMLOCK, &old_limit) != 0) { + throw std::runtime_error("Unable to get memory lock limit: " + stringerror()); + } + + /* Check if the current soft memlock limit is 64k */ + if (old_limit.rlim_cur < (64 * 1024)) { + struct rlimit new_limit; + new_limit.rlim_cur = new_limit_size; /* Increase soft limit to 1024k */ + new_limit.rlim_max = new_limit_size; /* Increase hard limit to 1024k */ + + if (setrlimit(RLIMIT_MEMLOCK, &new_limit) != 0) { + errlog("Unable to raise the maximum amount of locked memory for eBPF from %d to %d, consider raising RLIMIT_MEMLOCK or setting LimitMEMLOCK=infinity in the systemd unit: %s", old_limit.rlim_cur, new_limit.rlim_cur, stringerror()); + } + infolog("The current limit of locked memory (soft: %d, hard: %d) is too low for eBPF, trying to raise it to %d", old_limit.rlim_cur, old_limit.rlim_max, new_limit_size); + } + auto maps = d_maps.lock(); maps->d_v4 = BPFFilter::Map(configs["ipv4"], d_mapFormat); diff --git a/pdns/dnsdistdist/docs/advanced/ebpf.rst b/pdns/dnsdistdist/docs/advanced/ebpf.rst index 1c40bbda3b..6d8f9c2a20 100644 --- a/pdns/dnsdistdist/docs/advanced/ebpf.rst +++ b/pdns/dnsdistdist/docs/advanced/ebpf.rst @@ -85,7 +85,10 @@ Requirements In addition to the capabilities explained above, that feature might require an increase of the memory limit associated to a socket, via the sysctl setting ``net.core.optmem_max``. When attaching an eBPF program to a socket, the size of the program is checked against this limit, and the default value might not be enough. -Large map sizes might also require an increase of ``RLIMIT_MEMLOCK``, which can be done by adding ``LimitMEMLOCK=infinity`` in the systemd unit file. It can also be done manually for testing purposes, in a non-permanent way, by using ``ulimit -l``. +Large map sizes might also require an increase of ``RLIMIT_MEMLOCK``, which can be done by adding ``LimitMEMLOCK=limit`` in the systemd unit file, where limit is specified using byte as unit. It can also be done manually for testing purposes, in a non-permanent way, by using ``ulimit -l``. + +To change the default hard limit on ``RLIMIT_MEMLOCK`` add the following line to ``/etc/security/limits.conf`` for the user, specifying a limit in units of 1k, for example: + > $USER hard memlock 1024 External program, maps and XDP filtering ---------------------------------------- @@ -110,3 +113,4 @@ The first, legacy format is still used because of the limitations of eBPF socket XDP programs are more powerful than eBPF socket filtering ones as they are not limited to accepting or denying a packet, but can immediately craft and send an answer. They are also executed a bit earlier in the kernel networking path so can provide better performance. A sample program using the maps populated by dnsdist in an external XDP program can be found in the `contrib/ directory of our git repository `__. That program supports answering with a TC=1 response instead of simply dropping the packet. + -- 2.47.2