From: Remi Gacogne Date: Thu, 2 Mar 2023 14:06:59 +0000 (+0100) Subject: dnsdist: Add comments to the XSK code X-Git-Tag: dnsdist-1.9.0-rc1^2~43 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b1dd62ee62d3c0da1815d9e1d5ec87d76354946e;p=thirdparty%2Fpdns.git dnsdist: Add comments to the XSK code --- diff --git a/pdns/xsk.cc b/pdns/xsk.cc index 778d3cf4ae..722d823ae2 100644 --- a/pdns/xsk.cc +++ b/pdns/xsk.cc @@ -19,6 +19,11 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#include "config.h" + +#ifdef HAVE_XSK + #include "gettime.hh" #include "xsk.hh" @@ -48,7 +53,6 @@ #include #include -#ifdef HAVE_XSK #include #include extern "C" @@ -185,10 +189,12 @@ std::vector XskSocket::recv(uint32_t recvSizeMax, uint32_t* failed { uint32_t idx; std::vector res; + // how many descriptors to packets have been filled const auto recvSize = xsk_ring_cons__peek(&rx, recvSizeMax, &idx); if (recvSize <= 0) { return res; } + const auto baseAddr = reinterpret_cast(umem.bufBase); uint32_t count = 0; for (uint32_t i = 0; i < recvSize; i++) { @@ -202,10 +208,15 @@ std::vector XskSocket::recv(uint32_t recvSizeMax, uint32_t* failed res.push_back(std::move(ptr)); } } + + // this releases the descriptor, but not the packet (umem entries) + // which will only be made available again when pushed into the fill + // queue xsk_ring_cons__release(&rx, recvSize); if (failedCount) { *failedCount = count; } + return res; } void XskSocket::pickUpReadyPacket(std::vector& packets) @@ -752,7 +763,7 @@ void XskSocket::getMACFromIfName() auto fd = ::socket(AF_INET, SOCK_DGRAM, 0); strncpy(ifr.ifr_name, ifName.c_str(), ifName.length() + 1); if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) { - throw runtime_error("Error get MAC addr"); + throw runtime_error("Error getting MAC addr"); } memcpy(source, ifr.ifr_hwaddr.sa_data, sizeof(source)); close(fd); diff --git a/pdns/xsk.hh b/pdns/xsk.hh index 83c957dc32..4f9d5cc389 100644 --- a/pdns/xsk.hh +++ b/pdns/xsk.hh @@ -21,21 +21,14 @@ */ #pragma once -#include "iputils.hh" -#include "misc.hh" -#include "noinitvector.hh" -#include "lock.hh" +#ifdef HAVE_XSK #include #include #include #include -#include #include #include -#include -#include -#include #include #include #include @@ -45,9 +38,14 @@ #include #include #include +#include -#ifdef HAVE_XSK #include + +#include "iputils.hh" +#include "lock.hh" +#include "misc.hh" +#include "noinitvector.hh" #endif /* HAVE_XSK */ class XskPacket; @@ -74,7 +72,7 @@ class XskSocket { xsk_umem* umem{nullptr}; uint8_t* bufBase{nullptr}; - size_t size; + size_t size{0}; void umemInit(size_t memSize, xsk_ring_cons* cq, xsk_ring_prod* fq, xsk_umem_config* config); ~XskUmem(); XskUmem() = default; @@ -88,16 +86,25 @@ class XskSocket static constexpr size_t holdThreshold = 256; static constexpr size_t fillThreshold = 128; static constexpr size_t frameSize = 2048; + // number of entries (frames) in the umem const size_t frameNum; + // ID of the network queue const uint32_t queueId; + // responses that have been delayed std::priority_queue waitForDelay; const std::string ifName; const std::string poolName; + // AF_XDP socket then worker waker sockets vector fds; + // list of (indexes of) umem entries that can be reused vector uniqueEmptyFrameOffset; + // completion ring: queue where sent packets are stored by the kernel xsk_ring_cons cq; + // rx ring: queue where the incoming packets are stored, read by XskRouter xsk_ring_cons rx; + // fill ring: queue where umem entries available to be filled (put into rx) are stored xsk_ring_prod fq; + // tx ring: queue where outgoing packets are stored xsk_ring_prod tx; std::unique_ptr socket; XskUmem umem; @@ -114,18 +121,27 @@ class XskSocket [[nodiscard]] uint64_t frameOffset(const XskPacket& packet) const noexcept; int firstTimeout(); + // pick ups as many available frames as possible from uniqueEmptyFrameOffset + // and put them into sharedEmptyFrameOffset + // then insert them into fq void fillFq(uint32_t fillSize = fillThreshold) noexcept; + // picks up entries that have been processed (sent) and push them into uniqueEmptyFrameOffset void recycle(size_t size) noexcept; void getMACFromIfName(); + // look at delayed packets, and send the ones that are ready void pickUpReadyPacket(std::vector& packets); public: + // list of free umem entries that can be reused std::shared_ptr>> sharedEmptyFrameOffset; XskSocket(size_t frameNum, const std::string& ifName, uint32_t queue_id, const std::string& xskMapPath, const std::string& poolName_); MACAddr source; [[nodiscard]] int xskFd() const noexcept; + // wait until one event has occurred int wait(int timeout); + // add as many packets as possible to the rx queue for sending */ void send(std::vector& packets); + // look at incoming packets in rx, return them if parsing succeeeded std::vector recv(uint32_t recvSizeMax, uint32_t* failedCount); void addWorker(std::shared_ptr s, const ComboAddress& dest, bool isTCP); }; @@ -156,6 +172,7 @@ private: friend bool operator<(const XskPacketPtr& s1, const XskPacketPtr& s2) noexcept; constexpr static uint8_t DefaultTTL = 64; + // parse IP and UDP/TCP payloads bool parse(); void changeDirectAndUpdateChecksum() noexcept; @@ -208,7 +225,9 @@ public: uint8_t* umemBufBase; std::shared_ptr>> sharedEmptyFrameOffset; vector uniqueEmptyFrameOffset; + // queue of packets to be processed by this worker XskPacketRing cq; + // queue of packets processed by this worker (to be sent, or discarded) XskPacketRing sq; std::string poolName; size_t frameSize; @@ -219,13 +238,18 @@ public: static int createEventfd(); static void notify(int fd); static std::shared_ptr create(); + // notify worker that at least one packet is available for processing void notifyWorker() noexcept; + // notify the router that packets are ready to be sent void notifyXskSocket() noexcept; void waitForXskSocket() noexcept; void cleanWorkerNotification() noexcept; void cleanSocketNotification() noexcept; [[nodiscard]] uint64_t frameOffset(const XskPacket& s) const noexcept; + // reap empty umeme entry from sharedEmptyFrameOffset into uniqueEmptyFrameOffset void fillUniqueEmptyOffset(); + // look for an empty umem entry in uniqueEmptyFrameOffset + // then sharedEmptyFrameOffset if needed void* getEmptyframe(); }; std::vector getPollFdsForWorker(XskWorker& info);