--- /dev/null
+From 38ec4944b593fd90c5ef42aaaa53e66ae5769d04 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 13 Apr 2021 05:41:35 -0700
+Subject: gro: ensure frag0 meets IP header alignment
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 38ec4944b593fd90c5ef42aaaa53e66ae5769d04 upstream.
+
+After commit 0f6925b3e8da ("virtio_net: Do not pull payload in skb->head")
+Guenter Roeck reported one failure in his tests using sh architecture.
+
+After much debugging, we have been able to spot silent unaligned accesses
+in inet_gro_receive()
+
+The issue at hand is that upper networking stacks assume their header
+is word-aligned. Low level drivers are supposed to reserve NET_IP_ALIGN
+bytes before the Ethernet header to make that happen.
+
+This patch hardens skb_gro_reset_offset() to not allow frag0 fast-path
+if the fragment is not properly aligned.
+
+Some arches like x86, arm64 and powerpc do not care and define NET_IP_ALIGN
+as 0, this extra check will be a NOP for them.
+
+Note that if frag0 is not used, GRO will call pskb_may_pull()
+as many times as needed to pull network and transport headers.
+
+Fixes: 0f6925b3e8da ("virtio_net: Do not pull payload in skb->head")
+Fixes: 78a478d0efd9 ("gro: Inline skb_gro_header and cache frag0 virtual address")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Guenter Roeck <linux@roeck-us.net>
+Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Cc: "Michael S. Tsirkin" <mst@redhat.com>
+Cc: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Tested-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/skbuff.h | 9 +++++++++
+ net/core/dev.c | 3 ++-
+ 2 files changed, 11 insertions(+), 1 deletion(-)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -2789,6 +2789,15 @@ static inline void skb_propagate_pfmemal
+ }
+
+ /**
++ * skb_frag_off() - Returns the offset of a skb fragment
++ * @frag: the paged fragment
++ */
++static inline unsigned int skb_frag_off(const skb_frag_t *frag)
++{
++ return frag->page_offset;
++}
++
++/**
+ * skb_frag_page - retrieve the page referred to by a paged fragment
+ * @frag: the paged fragment
+ *
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -5400,7 +5400,8 @@ static void skb_gro_reset_offset(struct
+
+ if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
+ pinfo->nr_frags &&
+- !PageHighMem(skb_frag_page(frag0))) {
++ !PageHighMem(skb_frag_page(frag0)) &&
++ (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) {
+ NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
+ NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
+ skb_frag_size(frag0),
--- /dev/null
+From 0f6925b3e8da0dbbb52447ca8a8b42b371aac7db Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 2 Apr 2021 06:26:02 -0700
+Subject: virtio_net: Do not pull payload in skb->head
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 0f6925b3e8da0dbbb52447ca8a8b42b371aac7db upstream.
+
+Xuan Zhuo reported that commit 3226b158e67c ("net: avoid 32 x truesize
+under-estimation for tiny skbs") brought a ~10% performance drop.
+
+The reason for the performance drop was that GRO was forced
+to chain sk_buff (using skb_shinfo(skb)->frag_list), which
+uses more memory but also cause packet consumers to go over
+a lot of overhead handling all the tiny skbs.
+
+It turns out that virtio_net page_to_skb() has a wrong strategy :
+It allocates skbs with GOOD_COPY_LEN (128) bytes in skb->head, then
+copies 128 bytes from the page, before feeding the packet to GRO stack.
+
+This was suboptimal before commit 3226b158e67c ("net: avoid 32 x truesize
+under-estimation for tiny skbs") because GRO was using 2 frags per MSS,
+meaning we were not packing MSS with 100% efficiency.
+
+Fix is to pull only the ethernet header in page_to_skb()
+
+Then, we change virtio_net_hdr_to_skb() to pull the missing
+headers, instead of assuming they were already pulled by callers.
+
+This fixes the performance regression, but could also allow virtio_net
+to accept packets with more than 128bytes of headers.
+
+Many thanks to Xuan Zhuo for his report, and his tests/help.
+
+Fixes: 3226b158e67c ("net: avoid 32 x truesize under-estimation for tiny skbs")
+Reported-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Link: https://www.spinics.net/lists/netdev/msg731397.html
+Co-Developed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: "Michael S. Tsirkin" <mst@redhat.com>
+Cc: Jason Wang <jasowang@redhat.com>
+Cc: virtualization@lists.linux-foundation.org
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c | 10 +++++++---
+ include/linux/virtio_net.h | 14 +++++++++-----
+ 2 files changed, 16 insertions(+), 8 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -413,9 +413,13 @@ static struct sk_buff *page_to_skb(struc
+ offset += hdr_padded_len;
+ p += hdr_padded_len;
+
+- copy = len;
+- if (copy > skb_tailroom(skb))
+- copy = skb_tailroom(skb);
++ /* Copy all frame if it fits skb->head, otherwise
++ * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
++ */
++ if (len <= skb_tailroom(skb))
++ copy = len;
++ else
++ copy = ETH_HLEN + metasize;
+ skb_put_data(skb, p, copy);
+
+ if (metasize) {
+--- a/include/linux/virtio_net.h
++++ b/include/linux/virtio_net.h
+@@ -65,14 +65,18 @@ static inline int virtio_net_hdr_to_skb(
+ skb_reset_mac_header(skb);
+
+ if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+- u16 start = __virtio16_to_cpu(little_endian, hdr->csum_start);
+- u16 off = __virtio16_to_cpu(little_endian, hdr->csum_offset);
++ u32 start = __virtio16_to_cpu(little_endian, hdr->csum_start);
++ u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset);
++ u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16));
++
++ if (!pskb_may_pull(skb, needed))
++ return -EINVAL;
+
+ if (!skb_partial_csum_set(skb, start, off))
+ return -EINVAL;
+
+ p_off = skb_transport_offset(skb) + thlen;
+- if (p_off > skb_headlen(skb))
++ if (!pskb_may_pull(skb, p_off))
+ return -EINVAL;
+ } else {
+ /* gso packets without NEEDS_CSUM do not set transport_offset.
+@@ -102,14 +106,14 @@ retry:
+ }
+
+ p_off = keys.control.thoff + thlen;
+- if (p_off > skb_headlen(skb) ||
++ if (!pskb_may_pull(skb, p_off) ||
+ keys.basic.ip_proto != ip_proto)
+ return -EINVAL;
+
+ skb_set_transport_header(skb, keys.control.thoff);
+ } else if (gso_type) {
+ p_off = thlen;
+- if (p_off > skb_headlen(skb))
++ if (!pskb_may_pull(skb, p_off))
+ return -EINVAL;
+ }
+ }