]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
xfrm: iptfs: add skb-fragment sharing code
authorChristian Hopps <chopps@labn.net>
Thu, 14 Nov 2024 07:07:10 +0000 (02:07 -0500)
committerSteffen Klassert <steffen.klassert@secunet.com>
Thu, 5 Dec 2024 09:02:22 +0000 (10:02 +0100)
Avoid copying the inner packet data by sharing the skb data fragments
from the output packet skb into new inner packet skb.

Signed-off-by: Christian Hopps <chopps@labn.net>
Tested-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
net/xfrm/xfrm_iptfs.c

index 8538fb02ae8aa27ec11a435813d8123f2a092d61..1258158e57baae5a3fe88232022dd22047701f68 100644 (file)
@@ -81,6 +81,9 @@
 #define XFRM_IPTFS_MIN_L3HEADROOM 128
 #define XFRM_IPTFS_MIN_L2HEADROOM (L1_CACHE_BYTES > 64 ? 64 : 64 + 16)
 
+/* Min to try to share outer iptfs skb data vs copying into new skb */
+#define IPTFS_PKT_SHARE_MIN 129
+
 #define NSECS_IN_USEC 1000
 
 #define IPTFS_HRTIMER_MODE HRTIMER_MODE_REL_SOFT
@@ -234,10 +237,254 @@ static void iptfs_skb_head_to_frag(const struct sk_buff *skb, skb_frag_t *frag)
        skb_frag_fill_page_desc(frag, page, skb->data - addr, skb_headlen(skb));
 }
 
+/**
+ * struct iptfs_skb_frag_walk - use to track a walk through fragments
+ * @fragi: current fragment index
+ * @past: length of data in fragments before @fragi
+ * @total: length of data in all fragments
+ * @nr_frags: number of fragments present in array
+ * @initial_offset: the value passed in to skb_prepare_frag_walk()
+ * @frags: the page fragments inc. room for head page
+ * @pp_recycle: copy of skb->pp_recycle
+ */
+struct iptfs_skb_frag_walk {
+       u32 fragi;
+       u32 past;
+       u32 total;
+       u32 nr_frags;
+       u32 initial_offset;
+       skb_frag_t frags[MAX_SKB_FRAGS + 1];
+       bool pp_recycle;
+};
+
+/**
+ * iptfs_skb_prepare_frag_walk() - initialize a frag walk over an skb.
+ * @skb: the skb to walk.
+ * @initial_offset: start the walk @initial_offset into the skb.
+ * @walk: the walk to initialize
+ *
+ * Future calls to skb_add_frags() will expect the @offset value to be at
+ * least @initial_offset large.
+ */
+static void iptfs_skb_prepare_frag_walk(struct sk_buff *skb, u32 initial_offset,
+                                       struct iptfs_skb_frag_walk *walk)
+{
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       skb_frag_t *frag, *from;
+       u32 i;
+
+       walk->initial_offset = initial_offset;
+       walk->fragi = 0;
+       walk->past = 0;
+       walk->total = 0;
+       walk->nr_frags = 0;
+       walk->pp_recycle = skb->pp_recycle;
+
+       if (skb->head_frag) {
+               if (initial_offset >= skb_headlen(skb)) {
+                       initial_offset -= skb_headlen(skb);
+               } else {
+                       frag = &walk->frags[walk->nr_frags++];
+                       iptfs_skb_head_to_frag(skb, frag);
+                       frag->offset += initial_offset;
+                       frag->len -= initial_offset;
+                       walk->total += frag->len;
+                       initial_offset = 0;
+               }
+       } else {
+               initial_offset -= skb_headlen(skb);
+       }
+
+       for (i = 0; i < shinfo->nr_frags; i++) {
+               from = &shinfo->frags[i];
+               if (initial_offset >= from->len) {
+                       initial_offset -= from->len;
+                       continue;
+               }
+               frag = &walk->frags[walk->nr_frags++];
+               *frag = *from;
+               if (initial_offset) {
+                       frag->offset += initial_offset;
+                       frag->len -= initial_offset;
+                       initial_offset = 0;
+               }
+               walk->total += frag->len;
+       }
+}
+
+static u32 iptfs_skb_reset_frag_walk(struct iptfs_skb_frag_walk *walk,
+                                    u32 offset)
+{
+       /* Adjust offset to refer to internal walk values */
+       offset -= walk->initial_offset;
+
+       /* Get to the correct fragment for offset */
+       while (offset < walk->past) {
+               walk->past -= walk->frags[--walk->fragi].len;
+               if (offset >= walk->past)
+                       break;
+       }
+       while (offset >= walk->past + walk->frags[walk->fragi].len)
+               walk->past += walk->frags[walk->fragi++].len;
+
+       /* offset now relative to this current frag */
+       offset -= walk->past;
+       return offset;
+}
+
+/**
+ * iptfs_skb_can_add_frags() - check if ok to add frags from walk to skb
+ * @skb: skb to check for adding frags to
+ * @walk: the walk that will be used as source for frags.
+ * @offset: offset from beginning of original skb to start from.
+ * @len: amount of data to add frag references to in @skb.
+ *
+ * Return: true if ok to add frags.
+ */
+static bool iptfs_skb_can_add_frags(const struct sk_buff *skb,
+                                   struct iptfs_skb_frag_walk *walk,
+                                   u32 offset, u32 len)
+{
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       u32 fragi, nr_frags, fraglen;
+
+       if (skb_has_frag_list(skb) || skb->pp_recycle != walk->pp_recycle)
+               return false;
+
+       /* Make offset relative to current frag after setting that */
+       offset = iptfs_skb_reset_frag_walk(walk, offset);
+
+       /* Verify we have array space for the fragments we need to add */
+       fragi = walk->fragi;
+       nr_frags = shinfo->nr_frags;
+       while (len && fragi < walk->nr_frags) {
+               skb_frag_t *frag = &walk->frags[fragi];
+
+               fraglen = frag->len;
+               if (offset) {
+                       fraglen -= offset;
+                       offset = 0;
+               }
+               if (++nr_frags > MAX_SKB_FRAGS)
+                       return false;
+               if (len <= fraglen)
+                       return true;
+               len -= fraglen;
+               fragi++;
+       }
+       /* We may not copy all @len but what we have will fit. */
+       return true;
+}
+
+/**
+ * iptfs_skb_add_frags() - add a range of fragment references into an skb
+ * @skb: skb to add references into
+ * @walk: the walk to add referenced fragments from.
+ * @offset: offset from beginning of original skb to start from.
+ * @len: amount of data to add frag references to in @skb.
+ *
+ * iptfs_skb_can_add_frags() should be called before this function to verify
+ * that the destination @skb is compatible with the walk and has space in the
+ * array for the to be added frag references.
+ *
+ * Return: The number of bytes not added to @skb b/c we reached the end of the
+ * walk before adding all of @len.
+ */
+static int iptfs_skb_add_frags(struct sk_buff *skb,
+                              struct iptfs_skb_frag_walk *walk, u32 offset,
+                              u32 len)
+{
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       u32 fraglen;
+
+       if (!walk->nr_frags || offset >= walk->total + walk->initial_offset)
+               return len;
+
+       /* make offset relative to current frag after setting that */
+       offset = iptfs_skb_reset_frag_walk(walk, offset);
+
+       while (len && walk->fragi < walk->nr_frags) {
+               skb_frag_t *frag = &walk->frags[walk->fragi];
+               skb_frag_t *tofrag = &shinfo->frags[shinfo->nr_frags];
+
+               *tofrag = *frag;
+               if (offset) {
+                       tofrag->offset += offset;
+                       tofrag->len -= offset;
+                       offset = 0;
+               }
+               __skb_frag_ref(tofrag);
+               shinfo->nr_frags++;
+
+               /* see if we are done */
+               fraglen = tofrag->len;
+               if (len < fraglen) {
+                       tofrag->len = len;
+                       skb->len += len;
+                       skb->data_len += len;
+                       return 0;
+               }
+               /* advance to next source fragment */
+               len -= fraglen;                 /* careful, use dst bv_len */
+               skb->len += fraglen;            /* careful, "   "    "     */
+               skb->data_len += fraglen;       /* careful, "   "    "     */
+               walk->past += frag->len;        /* careful, use src bv_len */
+               walk->fragi++;
+       }
+       return len;
+}
+
 /* ================================== */
 /* IPTFS Receiving (egress) Functions */
 /* ================================== */
 
+/**
+ * iptfs_pskb_add_frags() - Create and add frags into a new sk_buff.
+ * @tpl: template to create new skb from.
+ * @walk: The source for fragments to add.
+ * @off: The offset into @walk to add frags from, also used with @st and
+ *       @copy_len.
+ * @len: The length of data to add covering frags from @walk into @skb.
+ *       This must be <= @skblen.
+ * @st: The sequence state to copy from into the new head skb.
+ * @copy_len: Copy @copy_len bytes from @st at offset @off into the new skb
+ *            linear space.
+ *
+ * Create a new sk_buff `skb` using the template @tpl. Copy @copy_len bytes from
+ * @st into the new skb linear space, and then add shared fragments from the
+ * frag walk for the remaining @len of data (i.e., @len - @copy_len bytes).
+ *
+ * Return: The newly allocated sk_buff `skb` or NULL if an error occurs.
+ */
+static struct sk_buff *
+iptfs_pskb_add_frags(struct sk_buff *tpl, struct iptfs_skb_frag_walk *walk,
+                    u32 off, u32 len, struct skb_seq_state *st, u32 copy_len)
+{
+       struct sk_buff *skb;
+
+       skb = iptfs_alloc_skb(tpl, copy_len, false);
+       if (!skb)
+               return NULL;
+
+       /* this should not normally be happening */
+       if (!iptfs_skb_can_add_frags(skb, walk, off + copy_len,
+                                    len - copy_len)) {
+               kfree_skb(skb);
+               return NULL;
+       }
+
+       if (copy_len &&
+           skb_copy_seq_read(st, off, skb_put(skb, copy_len), copy_len)) {
+               XFRM_INC_STATS(dev_net(st->root_skb->dev),
+                              LINUX_MIB_XFRMINERROR);
+               kfree_skb(skb);
+               return NULL;
+       }
+
+       iptfs_skb_add_frags(skb, walk, off + copy_len, len - copy_len);
+       return skb;
+}
+
 /**
  * iptfs_pskb_extract_seq() - Create and load data into a new sk_buff.
  * @skblen: the total data size for `skb`.
@@ -423,6 +670,8 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq,
                              struct skb_seq_state *st, struct sk_buff *skb,
                              u32 data, u32 blkoff, struct list_head *list)
 {
+       struct iptfs_skb_frag_walk _fragwalk;
+       struct iptfs_skb_frag_walk *fragwalk = NULL;
        struct sk_buff *newskb = xtfs->ra_newskb;
        u32 remaining = skb->len - data;
        u32 runtlen = xtfs->ra_runtlen;
@@ -567,10 +816,26 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq,
        fraglen = min(blkoff, remaining);
        copylen = min(fraglen, ipremain);
 
-       /* copy fragment data into newskb */
-       if (skb_copy_seq_read(st, data, skb_put(newskb, copylen), copylen)) {
-               XFRM_INC_STATS(dev_net(skb->dev), LINUX_MIB_XFRMINBUFFERERROR);
-               goto abandon;
+       /* If we may have the opportunity to share prepare a fragwalk. */
+       if (!skb_has_frag_list(skb) && !skb_has_frag_list(newskb) &&
+           (skb->head_frag || skb->len == skb->data_len) &&
+           skb->pp_recycle == newskb->pp_recycle) {
+               fragwalk = &_fragwalk;
+               iptfs_skb_prepare_frag_walk(skb, data, fragwalk);
+       }
+
+       /* Try share then copy. */
+       if (fragwalk &&
+           iptfs_skb_can_add_frags(newskb, fragwalk, data, copylen)) {
+               iptfs_skb_add_frags(newskb, fragwalk, data, copylen);
+       } else {
+               /* copy fragment data into newskb */
+               if (skb_copy_seq_read(st, data, skb_put(newskb, copylen),
+                                     copylen)) {
+                       XFRM_INC_STATS(xs_net(xtfs->x),
+                                      LINUX_MIB_XFRMINBUFFERERROR);
+                       goto abandon;
+               }
        }
 
        if (copylen < ipremain) {
@@ -601,6 +866,8 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data,
                                    struct list_head *sublist)
 {
        u8 hbytes[sizeof(struct ipv6hdr)];
+       struct iptfs_skb_frag_walk _fragwalk;
+       struct iptfs_skb_frag_walk *fragwalk = NULL;
        struct sk_buff *defer, *first_skb, *next, *skb;
        const unsigned char *old_mac;
        struct xfrm_iptfs_data *xtfs;
@@ -694,6 +961,7 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data,
                } else {
                        first_skb = skb;
                        first_iplen = iplen;
+                       fragwalk = NULL;
 
                        /* We are going to skip over `data` bytes to reach the
                         * start of the IP header of `iphlen` len for `iplen`
@@ -745,6 +1013,13 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data,
                                /* all pointers could be changed now reset walk */
                                skb_abort_seq_read(skbseq);
                                skb_prepare_seq_read(skb, data, tail, skbseq);
+                       } else if (skb->head_frag &&
+                                  /* We have the IP header right now */
+                                  remaining >= iphlen) {
+                               fragwalk = &_fragwalk;
+                               iptfs_skb_prepare_frag_walk(skb, data, fragwalk);
+                               defer = skb;
+                               skb = NULL;
                        } else {
                                /* We couldn't reuse the input skb so allocate a
                                 * new one.
@@ -760,8 +1035,17 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data,
 
                capturelen = min(iplen, remaining);
                if (!skb) {
-                       skb = iptfs_pskb_extract_seq(iplen, skbseq, data,
-                                                    capturelen);
+                       if (!fragwalk ||
+                           /* Large enough to be worth sharing */
+                           iplen < IPTFS_PKT_SHARE_MIN ||
+                           /* Have IP header + some data to share. */
+                           capturelen <= iphlen ||
+                           /* Try creating skb and adding frags */
+                           !(skb = iptfs_pskb_add_frags(first_skb, fragwalk,
+                                                        data, capturelen,
+                                                        skbseq, iphlen))) {
+                               skb = iptfs_pskb_extract_seq(iplen, skbseq, data, capturelen);
+                       }
                        if (!skb) {
                                /* skip to next packet or done */
                                data += capturelen;