Fixes for 6.1

author Sasha Levin <sashal@kernel.org>

Sat, 1 Mar 2025 14:19:53 +0000 (09:19 -0500)

committer Sasha Levin <sashal@kernel.org>

Sat, 1 Mar 2025 14:19:53 +0000 (09:19 -0500)
author Sasha Levin <sashal@kernel.org>
Sat, 1 Mar 2025 14:19:53 +0000 (09:19 -0500)
committer Sasha Levin <sashal@kernel.org>
Sat, 1 Mar 2025 14:19:53 +0000 (09:19 -0500)
diff --git a/queue-6.1/afs-fix-the-server_list-to-unuse-a-displaced-server-.patch b/queue-6.1/afs-fix-the-server_list-to-unuse-a-displaced-server-.patch

new file mode 100644 (file)

index 0000000..f6df08c
--- /dev/null
+++ b/queue-6.1/afs-fix-the-server_list-to-unuse-a-displaced-server-.patch
@@ -0,0 +1,59 @@
+From 1ba4f8c739fc4511f1da171793ba63861980e61b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Feb 2025 19:22:47 +0000
+Subject: afs: Fix the server_list to unuse a displaced server rather than
+ putting it
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit add117e48df4788a86a21bd0515833c0a6db1ad1 ]
+
+When allocating and building an afs_server_list struct object from a VLDB
+record, we look up each server address to get the server record for it -
+but a server may have more than one entry in the record and we discard the
+duplicate pointers.  Currently, however, when we discard, we only put a
+server record, not unuse it - but the lookup got as an active-user count.
+
+The active-user count on an afs_server_list object determines its lifetime
+whereas the refcount keeps the memory backing it around.  Failing to reduce
+the active-user counter prevents the record from being cleaned up and can
+lead to multiple copied being seen - and pointing to deleted afs_cell
+objects and other such things.
+
+Fix this by switching the incorrect 'put' to an 'unuse' instead.
+
+Without this, occasionally, a dead server record can be seen in
+/proc/net/afs/servers and list corruption may be observed:
+
+    list_del corruption. prev->next should be ffff888102423e40, but was 0000000000000000. (prev=ffff88810140cd38)
+
+Fixes: 977e5f8ed0ab ("afs: Split the usage count on struct afs_server")
+Signed-off-by: David Howells <dhowells@redhat.com>
+cc: Marc Dionne <marc.dionne@auristor.com>
+cc: Simon Horman <horms@kernel.org>
+cc: linux-afs@lists.infradead.org
+Link: https://patch.msgid.link/20250218192250.296870-5-dhowells@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/afs/server_list.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
+index 4d6369477f54e..89c75d934f79e 100644
+--- a/fs/afs/server_list.c
++++ b/fs/afs/server_list.c
+@@ -67,8 +67,8 @@ struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume,
+                               break;
+               if (j < slist->nr_servers) {
+                       if (slist->servers[j].server == server) {
+-                              afs_put_server(volume->cell->net, server,
+-                                             afs_server_trace_put_slist_isort);
++                              afs_unuse_server(volume->cell->net, server,
++                                               afs_server_trace_put_slist_isort);
+                               continue;
+                       }
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.1/afs-make-it-possible-to-find-the-volumes-that-are-us.patch b/queue-6.1/afs-make-it-possible-to-find-the-volumes-that-are-us.patch

new file mode 100644 (file)

index 0000000..a007a80
--- /dev/null
+++ b/queue-6.1/afs-make-it-possible-to-find-the-volumes-that-are-us.patch
@@ -0,0 +1,417 @@
+From 61857ffa217b971dad62722eddd225e93da7c12f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Nov 2023 16:08:43 +0000
+Subject: afs: Make it possible to find the volumes that are using a server
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit ca0e79a46097d54e4af46c67c852479d97af35bb ]
+
+Make it possible to find the afs_volume structs that are using an
+afs_server struct to aid in breaking volume callbacks.
+
+The way this is done is that each afs_volume already has an array of
+afs_server_entry records that point to the servers where that volume might
+be found.  An afs_volume backpointer and a list node is added to each entry
+and each entry is then added to an RCU-traversable list on the afs_server
+to which it points.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+cc: Marc Dionne <marc.dionne@auristor.com>
+cc: linux-afs@lists.infradead.org
+Stable-dep-of: add117e48df4 ("afs: Fix the server_list to unuse a displaced server rather than putting it")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/afs/cell.c        |   1 +
+ fs/afs/internal.h    |  23 +++++----
+ fs/afs/server.c      |   1 +
+ fs/afs/server_list.c | 112 +++++++++++++++++++++++++++++++++++++++----
+ fs/afs/vl_alias.c    |   2 +-
+ fs/afs/volume.c      |  36 ++++++++------
+ 6 files changed, 143 insertions(+), 32 deletions(-)
+
+diff --git a/fs/afs/cell.c b/fs/afs/cell.c
+index 926cb1188eba6..7c0dce8eecadd 100644
+--- a/fs/afs/cell.c
++++ b/fs/afs/cell.c
+@@ -161,6 +161,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
+       refcount_set(&cell->ref, 1);
+       atomic_set(&cell->active, 0);
+       INIT_WORK(&cell->manager, afs_manage_cell_work);
++      spin_lock_init(&cell->vs_lock);
+       cell->volumes = RB_ROOT;
+       INIT_HLIST_HEAD(&cell->proc_volumes);
+       seqlock_init(&cell->volume_lock);
+diff --git a/fs/afs/internal.h b/fs/afs/internal.h
+index 097d5a5f07b1a..fd4310272ccc1 100644
+--- a/fs/afs/internal.h
++++ b/fs/afs/internal.h
+@@ -378,6 +378,7 @@ struct afs_cell {
+       unsigned int            debug_id;
+ 
+       /* The volumes belonging to this cell */
++      spinlock_t              vs_lock;        /* Lock for server->volumes */
+       struct rb_root          volumes;        /* Tree of volumes on this server */
+       struct hlist_head       proc_volumes;   /* procfs volume list */
+       seqlock_t               volume_lock;    /* For volumes */
+@@ -501,6 +502,7 @@ struct afs_server {
+       struct hlist_node       addr4_link;     /* Link in net->fs_addresses4 */
+       struct hlist_node       addr6_link;     /* Link in net->fs_addresses6 */
+       struct hlist_node       proc_link;      /* Link in net->fs_proc */
++      struct list_head        volumes;        /* RCU list of afs_server_entry objects */
+       struct work_struct      initcb_work;    /* Work for CB.InitCallBackState* */
+       struct afs_server       *gc_next;       /* Next server in manager's list */
+       time64_t                unuse_time;     /* Time at which last unused */
+@@ -549,12 +551,14 @@ struct afs_server {
+  */
+ struct afs_server_entry {
+       struct afs_server       *server;
++      struct afs_volume       *volume;
++      struct list_head        slink;          /* Link in server->volumes */
+ };
+ 
+ struct afs_server_list {
+       struct rcu_head         rcu;
+-      afs_volid_t             vids[AFS_MAXTYPES]; /* Volume IDs */
+       refcount_t              usage;
++      bool                    attached;       /* T if attached to servers */
+       unsigned char           nr_servers;
+       unsigned char           preferred;      /* Preferred server */
+       unsigned short          vnovol_mask;    /* Servers to be skipped due to VNOVOL */
+@@ -567,10 +571,9 @@ struct afs_server_list {
+  * Live AFS volume management.
+  */
+ struct afs_volume {
+-      union {
+-              struct rcu_head rcu;
+-              afs_volid_t     vid;            /* volume ID */
+-      };
++      struct rcu_head rcu;
++      afs_volid_t             vid;            /* The volume ID of this volume */
++      afs_volid_t             vids[AFS_MAXTYPES]; /* All associated volume IDs */
+       refcount_t              ref;
+       time64_t                update_at;      /* Time at which to next update */
+       struct afs_cell         *cell;          /* Cell to which belongs (pins ref) */
+@@ -1450,10 +1453,14 @@ static inline struct afs_server_list *afs_get_serverlist(struct afs_server_list
+ }
+ 
+ extern void afs_put_serverlist(struct afs_net *, struct afs_server_list *);
+-extern struct afs_server_list *afs_alloc_server_list(struct afs_cell *, struct key *,
+-                                                   struct afs_vldb_entry *,
+-                                                   u8);
++struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume,
++                                            struct key *key,
++                                            struct afs_vldb_entry *vldb);
+ extern bool afs_annotate_server_list(struct afs_server_list *, struct afs_server_list *);
++void afs_attach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *slist);
++void afs_reattach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *slist,
++                                  struct afs_server_list *old);
++void afs_detach_volume_from_servers(struct afs_volume *volume, struct afs_server_list *slist);
+ 
+ /*
+  * super.c
+diff --git a/fs/afs/server.c b/fs/afs/server.c
+index 0bd2f5ba6900c..87381c2ffe374 100644
+--- a/fs/afs/server.c
++++ b/fs/afs/server.c
+@@ -236,6 +236,7 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
+       server->addr_version = alist->version;
+       server->uuid = *uuid;
+       rwlock_init(&server->fs_lock);
++      INIT_LIST_HEAD(&server->volumes);
+       INIT_WORK(&server->initcb_work, afs_server_init_callback_work);
+       init_waitqueue_head(&server->probe_wq);
+       INIT_LIST_HEAD(&server->probe_link);
+diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
+index b59896b1de0af..4d6369477f54e 100644
+--- a/fs/afs/server_list.c
++++ b/fs/afs/server_list.c
+@@ -24,13 +24,13 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
+ /*
+  * Build a server list from a VLDB record.
+  */
+-struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
++struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume,
+                                             struct key *key,
+-                                            struct afs_vldb_entry *vldb,
+-                                            u8 type_mask)
++                                            struct afs_vldb_entry *vldb)
+ {
+       struct afs_server_list *slist;
+       struct afs_server *server;
++      unsigned int type_mask = 1 << volume->type;
+       int ret = -ENOMEM, nr_servers = 0, i, j;
+ 
+       for (i = 0; i < vldb->nr_servers; i++)
+@@ -44,15 +44,12 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
+       refcount_set(&slist->usage, 1);
+       rwlock_init(&slist->lock);
+ 
+-      for (i = 0; i < AFS_MAXTYPES; i++)
+-              slist->vids[i] = vldb->vid[i];
+-
+       /* Make sure a records exists for each server in the list. */
+       for (i = 0; i < vldb->nr_servers; i++) {
+               if (!(vldb->fs_mask[i] & type_mask))
+                       continue;
+ 
+-              server = afs_lookup_server(cell, key, &vldb->fs_server[i],
++              server = afs_lookup_server(volume->cell, key, &vldb->fs_server[i],
+                                          vldb->addr_version[i]);
+               if (IS_ERR(server)) {
+                       ret = PTR_ERR(server);
+@@ -70,7 +67,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
+                               break;
+               if (j < slist->nr_servers) {
+                       if (slist->servers[j].server == server) {
+-                              afs_put_server(cell->net, server,
++                              afs_put_server(volume->cell->net, server,
+                                              afs_server_trace_put_slist_isort);
+                               continue;
+                       }
+@@ -81,6 +78,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
+               }
+ 
+               slist->servers[j].server = server;
++              slist->servers[j].volume = volume;
+               slist->nr_servers++;
+       }
+ 
+@@ -92,7 +90,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
+       return slist;
+ 
+ error_2:
+-      afs_put_serverlist(cell->net, slist);
++      afs_put_serverlist(volume->cell->net, slist);
+ error:
+       return ERR_PTR(ret);
+ }
+@@ -127,3 +125,99 @@ bool afs_annotate_server_list(struct afs_server_list *new,
+ 
+       return true;
+ }
++
++/*
++ * Attach a volume to the servers it is going to use.
++ */
++void afs_attach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *slist)
++{
++      struct afs_server_entry *se, *pe;
++      struct afs_server *server;
++      struct list_head *p;
++      unsigned int i;
++
++      spin_lock(&volume->cell->vs_lock);
++
++      for (i = 0; i < slist->nr_servers; i++) {
++              se = &slist->servers[i];
++              server = se->server;
++
++              list_for_each(p, &server->volumes) {
++                      pe = list_entry(p, struct afs_server_entry, slink);
++                      if (volume->vid <= pe->volume->vid)
++                              break;
++              }
++              list_add_tail_rcu(&se->slink, p);
++      }
++
++      slist->attached = true;
++      spin_unlock(&volume->cell->vs_lock);
++}
++
++/*
++ * Reattach a volume to the servers it is going to use when server list is
++ * replaced.  We try to switch the attachment points to avoid rewalking the
++ * lists.
++ */
++void afs_reattach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *new,
++                                  struct afs_server_list *old)
++{
++      unsigned int n = 0, o = 0;
++
++      spin_lock(&volume->cell->vs_lock);
++
++      while (n < new->nr_servers || o < old->nr_servers) {
++              struct afs_server_entry *pn = n < new->nr_servers ? &new->servers[n] : NULL;
++              struct afs_server_entry *po = o < old->nr_servers ? &old->servers[o] : NULL;
++              struct afs_server_entry *s;
++              struct list_head *p;
++              int diff;
++
++              if (pn && po && pn->server == po->server) {
++                      list_replace_rcu(&po->slink, &pn->slink);
++                      n++;
++                      o++;
++                      continue;
++              }
++
++              if (pn && po)
++                      diff = memcmp(&pn->server->uuid, &po->server->uuid,
++                                    sizeof(pn->server->uuid));
++              else
++                      diff = pn ? -1 : 1;
++
++              if (diff < 0) {
++                      list_for_each(p, &pn->server->volumes) {
++                              s = list_entry(p, struct afs_server_entry, slink);
++                              if (volume->vid <= s->volume->vid)
++                                      break;
++                      }
++                      list_add_tail_rcu(&pn->slink, p);
++                      n++;
++              } else {
++                      list_del_rcu(&po->slink);
++                      o++;
++              }
++      }
++
++      spin_unlock(&volume->cell->vs_lock);
++}
++
++/*
++ * Detach a volume from the servers it has been using.
++ */
++void afs_detach_volume_from_servers(struct afs_volume *volume, struct afs_server_list *slist)
++{
++      unsigned int i;
++
++      if (!slist->attached)
++              return;
++
++      spin_lock(&volume->cell->vs_lock);
++
++      for (i = 0; i < slist->nr_servers; i++)
++              list_del_rcu(&slist->servers[i].slink);
++
++      slist->attached = false;
++      spin_unlock(&volume->cell->vs_lock);
++}
+diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c
+index 83cf1bfbe343a..b2cc10df95308 100644
+--- a/fs/afs/vl_alias.c
++++ b/fs/afs/vl_alias.c
+@@ -126,7 +126,7 @@ static int afs_compare_volume_slists(const struct afs_volume *vol_a,
+       lb = rcu_dereference(vol_b->servers);
+ 
+       for (i = 0; i < AFS_MAXTYPES; i++)
+-              if (la->vids[i] != lb->vids[i])
++              if (vol_a->vids[i] != vol_b->vids[i])
+                       return 0;
+ 
+       while (a < la->nr_servers && b < lb->nr_servers) {
+diff --git a/fs/afs/volume.c b/fs/afs/volume.c
+index c028598a903c9..0f64b97581272 100644
+--- a/fs/afs/volume.c
++++ b/fs/afs/volume.c
+@@ -72,11 +72,11 @@ static void afs_remove_volume_from_cell(struct afs_volume *volume)
+  */
+ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
+                                          struct afs_vldb_entry *vldb,
+-                                         unsigned long type_mask)
++                                         struct afs_server_list **_slist)
+ {
+       struct afs_server_list *slist;
+       struct afs_volume *volume;
+-      int ret = -ENOMEM;
++      int ret = -ENOMEM, i;
+ 
+       volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
+       if (!volume)
+@@ -95,13 +95,16 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
+       rwlock_init(&volume->cb_v_break_lock);
+       memcpy(volume->name, vldb->name, vldb->name_len + 1);
+ 
+-      slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask);
++      for (i = 0; i < AFS_MAXTYPES; i++)
++              volume->vids[i] = vldb->vid[i];
++
++      slist = afs_alloc_server_list(volume, params->key, vldb);
+       if (IS_ERR(slist)) {
+               ret = PTR_ERR(slist);
+               goto error_1;
+       }
+ 
+-      refcount_set(&slist->usage, 1);
++      *_slist = slist;
+       rcu_assign_pointer(volume->servers, slist);
+       trace_afs_volume(volume->vid, 1, afs_volume_trace_alloc);
+       return volume;
+@@ -117,17 +120,19 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
+  * Look up or allocate a volume record.
+  */
+ static struct afs_volume *afs_lookup_volume(struct afs_fs_context *params,
+-                                          struct afs_vldb_entry *vldb,
+-                                          unsigned long type_mask)
++                                          struct afs_vldb_entry *vldb)
+ {
++      struct afs_server_list *slist;
+       struct afs_volume *candidate, *volume;
+ 
+-      candidate = afs_alloc_volume(params, vldb, type_mask);
++      candidate = afs_alloc_volume(params, vldb, &slist);
+       if (IS_ERR(candidate))
+               return candidate;
+ 
+       volume = afs_insert_volume_into_cell(params->cell, candidate);
+-      if (volume != candidate)
++      if (volume == candidate)
++              afs_attach_volume_to_servers(volume, slist);
++      else
+               afs_put_volume(params->net, candidate, afs_volume_trace_put_cell_dup);
+       return volume;
+ }
+@@ -208,8 +213,7 @@ struct afs_volume *afs_create_volume(struct afs_fs_context *params)
+               goto error;
+       }
+ 
+-      type_mask = 1UL << params->type;
+-      volume = afs_lookup_volume(params, vldb, type_mask);
++      volume = afs_lookup_volume(params, vldb);
+ 
+ error:
+       kfree(vldb);
+@@ -221,14 +225,17 @@ struct afs_volume *afs_create_volume(struct afs_fs_context *params)
+  */
+ static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
+ {
++      struct afs_server_list *slist = rcu_access_pointer(volume->servers);
++
+       _enter("%p", volume);
+ 
+ #ifdef CONFIG_AFS_FSCACHE
+       ASSERTCMP(volume->cache, ==, NULL);
+ #endif
+ 
++      afs_detach_volume_from_servers(volume, slist);
+       afs_remove_volume_from_cell(volume);
+-      afs_put_serverlist(net, rcu_access_pointer(volume->servers));
++      afs_put_serverlist(net, slist);
+       afs_put_cell(volume->cell, afs_cell_trace_put_vol);
+       trace_afs_volume(volume->vid, refcount_read(&volume->ref),
+                        afs_volume_trace_free);
+@@ -362,8 +369,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
+       }
+ 
+       /* See if the volume's server list got updated. */
+-      new = afs_alloc_server_list(volume->cell, key,
+-                                  vldb, (1 << volume->type));
++      new = afs_alloc_server_list(volume, key, vldb);
+       if (IS_ERR(new)) {
+               ret = PTR_ERR(new);
+               goto error_vldb;
+@@ -384,9 +390,11 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
+ 
+       volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
+       write_unlock(&volume->servers_lock);
+-      ret = 0;
+ 
++      if (discard == old)
++              afs_reattach_volume_to_servers(volume, new, old);
+       afs_put_serverlist(volume->cell->net, discard);
++      ret = 0;
+ error_vldb:
+       kfree(vldb);
+ error:
+-- 
+2.39.5
+
diff --git a/queue-6.1/afs-remove-variable-nr_servers.patch b/queue-6.1/afs-remove-variable-nr_servers.patch

new file mode 100644 (file)

index 0000000..7a8b2eb
--- /dev/null
+++ b/queue-6.1/afs-remove-variable-nr_servers.patch
@@ -0,0 +1,44 @@
+From abd023b40e52ce60463652114238de75a324d04f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Oct 2022 18:39:23 +0100
+Subject: afs: remove variable nr_servers
+
+From: Colin Ian King <colin.i.king@gmail.com>
+
+[ Upstream commit 318b83b71242998814a570c3420c042ee6165fca ]
+
+Variable nr_servers is no longer being used, the last reference
+to it was removed in commit 45df8462730d ("afs: Fix server list handling")
+so clean up the code by removing it.
+
+Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+cc: Marc Dionne <marc.dionne@auristor.com>
+cc: linux-afs@lists.infradead.org
+Link: https://lore.kernel.org/r/20221020173923.21342-1-colin.i.king@gmail.com/
+Stable-dep-of: add117e48df4 ("afs: Fix the server_list to unuse a displaced server rather than putting it")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/afs/volume.c | 6 +-----
+ 1 file changed, 1 insertion(+), 5 deletions(-)
+
+diff --git a/fs/afs/volume.c b/fs/afs/volume.c
+index a146d70efa650..c028598a903c9 100644
+--- a/fs/afs/volume.c
++++ b/fs/afs/volume.c
+@@ -76,11 +76,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
+ {
+       struct afs_server_list *slist;
+       struct afs_volume *volume;
+-      int ret = -ENOMEM, nr_servers = 0, i;
+-
+-      for (i = 0; i < vldb->nr_servers; i++)
+-              if (vldb->fs_mask[i] & type_mask)
+-                      nr_servers++;
++      int ret = -ENOMEM;
+ 
+       volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
+       if (!volume)
+-- 
+2.39.5
+
diff --git a/queue-6.1/alsa-usb-audio-avoid-dropping-midi-events-at-closing.patch b/queue-6.1/alsa-usb-audio-avoid-dropping-midi-events-at-closing.patch

new file mode 100644 (file)

index 0000000..949be5a
--- /dev/null
+++ b/queue-6.1/alsa-usb-audio-avoid-dropping-midi-events-at-closing.patch
@@ -0,0 +1,47 @@
+From 868e655e0ba14e36ff87267557a61b710e48a6dd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Feb 2025 12:40:24 +0100
+Subject: ALSA: usb-audio: Avoid dropping MIDI events at closing multiple ports
+
+From: Takashi Iwai <tiwai@suse.de>
+
+[ Upstream commit a3bdd8f5c2217e1cb35db02c2eed36ea20fb50f5 ]
+
+We fixed the UAF issue in USB MIDI code by canceling the pending work
+at closing each MIDI output device in the commit below.  However, this
+assumed that it's the only one that is tied with the endpoint, and it
+resulted in unexpected data truncations when multiple devices are
+assigned to a single endpoint and opened simultaneously.
+
+For addressing the unexpected MIDI message drops, simply replace
+cancel_work_sync() with flush_work().  The drain callback should have
+been already invoked before the close callback, hence the port->active
+flag must be already cleared.  So this just assures that the pending
+work is finished before freeing the resources.
+
+Fixes: 0125de38122f ("ALSA: usb-audio: Cancel pending work at closing a MIDI substream")
+Reported-and-tested-by: John Keeping <jkeeping@inmusicbrands.com>
+Closes: https://lore.kernel.org/20250217111647.3368132-1-jkeeping@inmusicbrands.com
+Link: https://patch.msgid.link/20250218114024.23125-1-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/usb/midi.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sound/usb/midi.c b/sound/usb/midi.c
+index 2839f6b6f09b4..eed71369c7af2 100644
+--- a/sound/usb/midi.c
++++ b/sound/usb/midi.c
+@@ -1145,7 +1145,7 @@ static int snd_usbmidi_output_close(struct snd_rawmidi_substream *substream)
+ {
+       struct usbmidi_out_port *port = substream->runtime->private_data;
+ 
+-      cancel_work_sync(&port->ep->work);
++      flush_work(&port->ep->work);
+       return substream_open(substream, 0, 0);
+ }
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.1/asoc-es8328-fix-route-from-dac-to-output.patch b/queue-6.1/asoc-es8328-fix-route-from-dac-to-output.patch

new file mode 100644 (file)

index 0000000..fc71db6
--- /dev/null
+++ b/queue-6.1/asoc-es8328-fix-route-from-dac-to-output.patch
@@ -0,0 +1,110 @@
+From 7324798703ecd45fed10a1ed326cc27dd6fa8c62 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 22 Feb 2025 20:39:57 +0100
+Subject: ASoC: es8328: fix route from DAC to output
+
+From: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
+
+[ Upstream commit 5b0c02f9b8acf2a791e531bbc09acae2d51f4f9b ]
+
+The ES8328 codec driver, which is also used for the ES8388 chip that
+appears to have an identical register map, claims that the output can
+either take the route from DAC->Mixer->Output or through DAC->Output
+directly. To the best of what I could find, this is not true, and
+creates problems.
+
+Without DACCONTROL17 bit index 7 set for the left channel, as well as
+DACCONTROL20 bit index 7 set for the right channel, I cannot get any
+analog audio out on Left Out 2 and Right Out 2 respectively, despite the
+DAPM routes claiming that this should be possible. Furthermore, the same
+is the case for Left Out 1 and Right Out 1, showing that those two don't
+have a direct route from DAC to output bypassing the mixer either.
+
+Those control bits toggle whether the DACs are fed (stale bread?) into
+their respective mixers. If one "unmutes" the mixer controls in
+alsamixer, then sure, the audio output works, but if it doesn't work
+without the mixer being fed the DAC input then evidently it's not a
+direct output from the DAC.
+
+ES8328/ES8388 are seemingly not alone in this. ES8323, which uses a
+separate driver for what appears to be a very similar register map,
+simply flips those two bits on in its probe function, and then pretends
+there is no power management whatsoever for the individual controls.
+Fair enough.
+
+My theory as to why nobody has noticed this up to this point is that
+everyone just assumes it's their fault when they had to unmute an
+additional control in ALSA.
+
+Fix this in the es8328 driver by removing the erroneous direct route,
+then get rid of the playback switch controls and have those bits tied to
+the mixer's widget instead, which until now had no register to play
+with.
+
+Fixes: 567e4f98922c ("ASoC: add es8328 codec driver")
+Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
+Link: https://patch.msgid.link/20250222-es8328-route-bludgeoning-v1-1-99bfb7fb22d9@collabora.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/codecs/es8328.c | 15 ++++-----------
+ 1 file changed, 4 insertions(+), 11 deletions(-)
+
+diff --git a/sound/soc/codecs/es8328.c b/sound/soc/codecs/es8328.c
+index 160adc706cc69..8182e9b37c03d 100644
+--- a/sound/soc/codecs/es8328.c
++++ b/sound/soc/codecs/es8328.c
+@@ -234,7 +234,6 @@ static const struct snd_kcontrol_new es8328_right_line_controls =
+ 
+ /* Left Mixer */
+ static const struct snd_kcontrol_new es8328_left_mixer_controls[] = {
+-      SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL17, 7, 1, 0),
+       SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL17, 6, 1, 0),
+       SOC_DAPM_SINGLE("Right Playback Switch", ES8328_DACCONTROL18, 7, 1, 0),
+       SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL18, 6, 1, 0),
+@@ -244,7 +243,6 @@ static const struct snd_kcontrol_new es8328_left_mixer_controls[] = {
+ static const struct snd_kcontrol_new es8328_right_mixer_controls[] = {
+       SOC_DAPM_SINGLE("Left Playback Switch", ES8328_DACCONTROL19, 7, 1, 0),
+       SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL19, 6, 1, 0),
+-      SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL20, 7, 1, 0),
+       SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL20, 6, 1, 0),
+ };
+ 
+@@ -337,10 +335,10 @@ static const struct snd_soc_dapm_widget es8328_dapm_widgets[] = {
+       SND_SOC_DAPM_DAC("Left DAC", "Left Playback", ES8328_DACPOWER,
+                       ES8328_DACPOWER_LDAC_OFF, 1),
+ 
+-      SND_SOC_DAPM_MIXER("Left Mixer", SND_SOC_NOPM, 0, 0,
++      SND_SOC_DAPM_MIXER("Left Mixer", ES8328_DACCONTROL17, 7, 0,
+               &es8328_left_mixer_controls[0],
+               ARRAY_SIZE(es8328_left_mixer_controls)),
+-      SND_SOC_DAPM_MIXER("Right Mixer", SND_SOC_NOPM, 0, 0,
++      SND_SOC_DAPM_MIXER("Right Mixer", ES8328_DACCONTROL20, 7, 0,
+               &es8328_right_mixer_controls[0],
+               ARRAY_SIZE(es8328_right_mixer_controls)),
+ 
+@@ -419,19 +417,14 @@ static const struct snd_soc_dapm_route es8328_dapm_routes[] = {
+       { "Right Line Mux", "PGA", "Right PGA Mux" },
+       { "Right Line Mux", "Differential", "Differential Mux" },
+ 
+-      { "Left Out 1", NULL, "Left DAC" },
+-      { "Right Out 1", NULL, "Right DAC" },
+-      { "Left Out 2", NULL, "Left DAC" },
+-      { "Right Out 2", NULL, "Right DAC" },
+-
+-      { "Left Mixer", "Playback Switch", "Left DAC" },
++      { "Left Mixer", NULL, "Left DAC" },
+       { "Left Mixer", "Left Bypass Switch", "Left Line Mux" },
+       { "Left Mixer", "Right Playback Switch", "Right DAC" },
+       { "Left Mixer", "Right Bypass Switch", "Right Line Mux" },
+ 
+       { "Right Mixer", "Left Playback Switch", "Left DAC" },
+       { "Right Mixer", "Left Bypass Switch", "Left Line Mux" },
+-      { "Right Mixer", "Playback Switch", "Right DAC" },
++      { "Right Mixer", NULL, "Right DAC" },
+       { "Right Mixer", "Right Bypass Switch", "Right Line Mux" },
+ 
+       { "DAC DIG", NULL, "DAC STM" },
+-- 
+2.39.5
+
diff --git a/queue-6.1/bluetooth-l2cap-fix-l2cap_ecred_conn_rsp-response.patch b/queue-6.1/bluetooth-l2cap-fix-l2cap_ecred_conn_rsp-response.patch

new file mode 100644 (file)

index 0000000..cfa8adc
--- /dev/null
+++ b/queue-6.1/bluetooth-l2cap-fix-l2cap_ecred_conn_rsp-response.patch
@@ -0,0 +1,99 @@
+From 719c2d3d6138e88d85b45cbb6a1000ba448e6078 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Feb 2025 10:30:25 -0500
+Subject: Bluetooth: L2CAP: Fix L2CAP_ECRED_CONN_RSP response
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+[ Upstream commit b25120e1d5f2ebb3db00af557709041f47f7f3d0 ]
+
+L2CAP_ECRED_CONN_RSP needs to respond DCID in the same order received as
+SCID but the order is reversed due to use of list_add which actually
+prepend channels to the list so the response is reversed:
+
+> ACL Data RX: Handle 16 flags 0x02 dlen 26
+      LE L2CAP: Enhanced Credit Connection Request (0x17) ident 2 len 18
+        PSM: 39 (0x0027)
+        MTU: 256
+        MPS: 251
+        Credits: 65535
+        Source CID: 116
+        Source CID: 117
+        Source CID: 118
+        Source CID: 119
+        Source CID: 120
+< ACL Data TX: Handle 16 flags 0x00 dlen 26
+      LE L2CAP: Enhanced Credit Connection Response (0x18) ident 2 len 18
+        MTU: 517
+        MPS: 247
+        Credits: 3
+        Result: Connection successful (0x0000)
+        Destination CID: 68
+        Destination CID: 67
+        Destination CID: 66
+        Destination CID: 65
+        Destination CID: 64
+
+Also make sure the response don't include channels that are not on
+BT_CONNECT2 since the chan->ident can be set to the same value as in the
+following trace:
+
+< ACL Data TX: Handle 16 flags 0x00 dlen 12
+      LE L2CAP: LE Flow Control Credit (0x16) ident 6 len 4
+        Source CID: 64
+        Credits: 1
+...
+> ACL Data RX: Handle 16 flags 0x02 dlen 18
+      LE L2CAP: Enhanced Credit Connection Request (0x17) ident 6 len 10
+        PSM: 39 (0x0027)
+        MTU: 517
+        MPS: 251
+        Credits: 255
+        Source CID: 70
+< ACL Data TX: Handle 16 flags 0x00 dlen 20
+      LE L2CAP: Enhanced Credit Connection Response (0x18) ident 6 len 12
+        MTU: 517
+        MPS: 247
+        Credits: 3
+        Result: Connection successful (0x0000)
+        Destination CID: 64
+        Destination CID: 68
+
+Closes: https://github.com/bluez/bluez/issues/1094
+Fixes: 9aa9d9473f15 ("Bluetooth: L2CAP: Fix responding with wrong PDU type")
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/l2cap_core.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
+index 2a8051fae08c7..36d6122f2e12d 100644
+--- a/net/bluetooth/l2cap_core.c
++++ b/net/bluetooth/l2cap_core.c
+@@ -656,7 +656,8 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan)
+           test_bit(FLAG_HOLD_HCI_CONN, &chan->flags))
+               hci_conn_hold(conn->hcon);
+ 
+-      list_add(&chan->list, &conn->chan_l);
++      /* Append to the list since the order matters for ECRED */
++      list_add_tail(&chan->list, &conn->chan_l);
+ }
+ 
+ void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan)
+@@ -3995,7 +3996,11 @@ static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data)
+ {
+       struct l2cap_ecred_rsp_data *rsp = data;
+ 
+-      if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags))
++      /* Check if channel for outgoing connection or if it wasn't deferred
++       * since in those cases it must be skipped.
++       */
++      if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags) ||
++          !test_and_clear_bit(FLAG_DEFER_SETUP, &chan->flags))
+               return;
+ 
+       /* Reset ident so only one response is sent */
+-- 
+2.39.5
+
diff --git a/queue-6.1/include-net-add-static-inline-dst_dev_overhead-to-ds.patch b/queue-6.1/include-net-add-static-inline-dst_dev_overhead-to-ds.patch

new file mode 100644 (file)

index 0000000..14b739d
--- /dev/null
+++ b/queue-6.1/include-net-add-static-inline-dst_dev_overhead-to-ds.patch
@@ -0,0 +1,49 @@
+From 2a95bdb541af9b0db397f9fd9c6c45bd621b0c44 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Dec 2024 13:49:42 +0100
+Subject: include: net: add static inline dst_dev_overhead() to dst.h
+
+From: Justin Iurman <justin.iurman@uliege.be>
+
+[ Upstream commit 0600cf40e9b36fe17f9c9f04d4f9cef249eaa5e7 ]
+
+Add static inline dst_dev_overhead() function to include/net/dst.h. This
+helper function is used by ioam6_iptunnel, rpl_iptunnel and
+seg6_iptunnel to get the dev's overhead based on a cache entry
+(dst_entry). If the cache is empty, the default and generic value
+skb->mac_len is returned. Otherwise, LL_RESERVED_SPACE() over dst's dev
+is returned.
+
+Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
+Cc: Alexander Lobakin <aleksander.lobakin@intel.com>
+Cc: Vadim Fedorenko <vadim.fedorenko@linux.dev>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: c64a0727f9b1 ("net: ipv6: fix dst ref loop on input in seg6 lwt")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/dst.h | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/include/net/dst.h b/include/net/dst.h
+index d67fda89cd0fa..3a1a6f94a8092 100644
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -434,6 +434,15 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout)
+               dst->expires = expires;
+ }
+ 
++static inline unsigned int dst_dev_overhead(struct dst_entry *dst,
++                                          struct sk_buff *skb)
++{
++      if (likely(dst))
++              return LL_RESERVED_SPACE(dst->dev);
++
++      return skb->mac_len;
++}
++
+ INDIRECT_CALLABLE_DECLARE(int ip6_output(struct net *, struct sock *,
+                                        struct sk_buff *));
+ INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *,
+-- 
+2.39.5
+
diff --git a/queue-6.1/ipv4-convert-icmp_route_lookup-to-dscp_t.patch b/queue-6.1/ipv4-convert-icmp_route_lookup-to-dscp_t.patch

new file mode 100644 (file)

index 0000000..f86edfc
--- /dev/null
+++ b/queue-6.1/ipv4-convert-icmp_route_lookup-to-dscp_t.patch
@@ -0,0 +1,81 @@
+From 58dc84dfc7fe7af13d34987cc888d07ad85f3155 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Oct 2024 21:28:37 +0200
+Subject: ipv4: Convert icmp_route_lookup() to dscp_t.
+
+From: Guillaume Nault <gnault@redhat.com>
+
+[ Upstream commit 913c83a610bb7dd8e5952a2b4663e1feec0b5de6 ]
+
+Pass a dscp_t variable to icmp_route_lookup(), instead of a plain u8,
+to prevent accidental setting of ECN bits in ->flowi4_tos. Rename that
+variable ("tos" -> "dscp") to make the intent clear.
+
+While there, reorganise the function parameters to fill up horizontal
+space.
+
+Signed-off-by: Guillaume Nault <gnault@redhat.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://patch.msgid.link/294fead85c6035bcdc5fcf9a6bb4ce8798c45ba1.1727807926.git.gnault@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 27843ce6ba3d ("ipvlan: ensure network headers are in skb linear part")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/icmp.c | 19 +++++++++----------
+ 1 file changed, 9 insertions(+), 10 deletions(-)
+
+diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
+index a154339845dd4..855fcef829e2c 100644
+--- a/net/ipv4/icmp.c
++++ b/net/ipv4/icmp.c
+@@ -484,13 +484,11 @@ static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb)
+       return route_lookup_dev;
+ }
+ 
+-static struct rtable *icmp_route_lookup(struct net *net,
+-                                      struct flowi4 *fl4,
++static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4,
+                                       struct sk_buff *skb_in,
+-                                      const struct iphdr *iph,
+-                                      __be32 saddr, u8 tos, u32 mark,
+-                                      int type, int code,
+-                                      struct icmp_bxm *param)
++                                      const struct iphdr *iph, __be32 saddr,
++                                      dscp_t dscp, u32 mark, int type,
++                                      int code, struct icmp_bxm *param)
+ {
+       struct net_device *route_lookup_dev;
+       struct rtable *rt, *rt2;
+@@ -503,7 +501,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
+       fl4->saddr = saddr;
+       fl4->flowi4_mark = mark;
+       fl4->flowi4_uid = sock_net_uid(net, NULL);
+-      fl4->flowi4_tos = tos & INET_DSCP_MASK;
++      fl4->flowi4_tos = inet_dscp_to_dsfield(dscp);
+       fl4->flowi4_proto = IPPROTO_ICMP;
+       fl4->fl4_icmp_type = type;
+       fl4->fl4_icmp_code = code;
+@@ -551,7 +549,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
+               orefdst = skb_in->_skb_refdst; /* save old refdst */
+               skb_dst_set(skb_in, NULL);
+               err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,
+-                                   tos, rt2->dst.dev);
++                                   inet_dscp_to_dsfield(dscp), rt2->dst.dev);
+ 
+               dst_release(&rt2->dst);
+               rt2 = skb_rtable(skb_in);
+@@ -747,8 +745,9 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+       ipc.opt = &icmp_param.replyopts.opt;
+       ipc.sockc.mark = mark;
+ 
+-      rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
+-                             type, code, &icmp_param);
++      rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr,
++                             inet_dsfield_to_dscp(tos), mark, type, code,
++                             &icmp_param);
+       if (IS_ERR(rt))
+               goto out_unlock;
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.1/ipv4-convert-ip_route_input-to-dscp_t.patch b/queue-6.1/ipv4-convert-ip_route_input-to-dscp_t.patch

new file mode 100644 (file)

index 0000000..f811a9c
--- /dev/null
+++ b/queue-6.1/ipv4-convert-ip_route_input-to-dscp_t.patch
@@ -0,0 +1,156 @@
+From 61f5ec7e5f8d03c48b3895ab18c882105110647d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Oct 2024 21:28:43 +0200
+Subject: ipv4: Convert ip_route_input() to dscp_t.
+
+From: Guillaume Nault <gnault@redhat.com>
+
+[ Upstream commit 7e863e5db6185b1add0df4cb01b31a4ed1c4b738 ]
+
+Pass a dscp_t variable to ip_route_input(), instead of a plain u8, to
+prevent accidental setting of ECN bits in ->flowi4_tos.
+
+Callers of ip_route_input() to consider are:
+
+  * input_action_end_dx4_finish() and input_action_end_dt4() in
+    net/ipv6/seg6_local.c. These functions set the tos parameter to 0,
+    which is already a valid dscp_t value, so they don't need to be
+    adjusted for the new prototype.
+
+  * icmp_route_lookup(), which already has a dscp_t variable to pass as
+    parameter. We just need to remove the inet_dscp_to_dsfield()
+    conversion.
+
+  * br_nf_pre_routing_finish(), ip_options_rcv_srr() and ip4ip6_err(),
+    which get the DSCP directly from IPv4 headers. Define a helper to
+    read the .tos field of struct iphdr as dscp_t, so that these
+    function don't have to do the conversion manually.
+
+While there, declare *iph as const in br_nf_pre_routing_finish(),
+declare its local variables in reverse-christmas-tree order and move
+the "err = ip_route_input()" assignment out of the conditional to avoid
+checkpatch warning.
+
+Signed-off-by: Guillaume Nault <gnault@redhat.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://patch.msgid.link/e9d40781d64d3d69f4c79ac8a008b8d67a033e8d.1727807926.git.gnault@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 27843ce6ba3d ("ipvlan: ensure network headers are in skb linear part")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/ip.h                | 5 +++++
+ include/net/route.h             | 5 +++--
+ net/bridge/br_netfilter_hooks.c | 8 +++++---
+ net/ipv4/icmp.c                 | 2 +-
+ net/ipv4/ip_options.c           | 3 ++-
+ net/ipv6/ip6_tunnel.c           | 4 ++--
+ 6 files changed, 18 insertions(+), 9 deletions(-)
+
+diff --git a/include/net/ip.h b/include/net/ip.h
+index 9d754c4a53002..4ee23eb0814a3 100644
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -409,6 +409,11 @@ int ip_decrease_ttl(struct iphdr *iph)
+       return --iph->ttl;
+ }
+ 
++static inline dscp_t ip4h_dscp(const struct iphdr *ip4h)
++{
++      return inet_dsfield_to_dscp(ip4h->tos);
++}
++
+ static inline int ip_mtu_locked(const struct dst_entry *dst)
+ {
+       const struct rtable *rt = (const struct rtable *)dst;
+diff --git a/include/net/route.h b/include/net/route.h
+index f396176022377..4185e6da9ef85 100644
+--- a/include/net/route.h
++++ b/include/net/route.h
+@@ -203,12 +203,13 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src,
+                     const struct sk_buff *hint);
+ 
+ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
+-                               u8 tos, struct net_device *devin)
++                               dscp_t dscp, struct net_device *devin)
+ {
+       int err;
+ 
+       rcu_read_lock();
+-      err = ip_route_input_noref(skb, dst, src, tos, devin);
++      err = ip_route_input_noref(skb, dst, src, inet_dscp_to_dsfield(dscp),
++                                 devin);
+       if (!err) {
+               skb_dst_force(skb);
+               if (!skb_dst(skb))
+diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
+index 5c6ed1d49b92c..b4d661fe7886d 100644
+--- a/net/bridge/br_netfilter_hooks.c
++++ b/net/bridge/br_netfilter_hooks.c
+@@ -366,9 +366,9 @@ br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb,
+  */
+ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+ {
+-      struct net_device *dev = skb->dev, *br_indev;
+-      struct iphdr *iph = ip_hdr(skb);
+       struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
++      struct net_device *dev = skb->dev, *br_indev;
++      const struct iphdr *iph = ip_hdr(skb);
+       struct rtable *rt;
+       int err;
+ 
+@@ -386,7 +386,9 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
+       }
+       nf_bridge->in_prerouting = 0;
+       if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) {
+-              if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
++              err = ip_route_input(skb, iph->daddr, iph->saddr,
++                                   ip4h_dscp(iph), dev);
++              if (err) {
+                       struct in_device *in_dev = __in_dev_get_rcu(dev);
+ 
+                       /* If err equals -EHOSTUNREACH the error is due to a
+diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
+index 855fcef829e2c..94501bb30c431 100644
+--- a/net/ipv4/icmp.c
++++ b/net/ipv4/icmp.c
+@@ -549,7 +549,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4,
+               orefdst = skb_in->_skb_refdst; /* save old refdst */
+               skb_dst_set(skb_in, NULL);
+               err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,
+-                                   inet_dscp_to_dsfield(dscp), rt2->dst.dev);
++                                   dscp, rt2->dst.dev);
+ 
+               dst_release(&rt2->dst);
+               rt2 = skb_rtable(skb_in);
+diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
+index a9e22a098872f..b4c59708fc095 100644
+--- a/net/ipv4/ip_options.c
++++ b/net/ipv4/ip_options.c
+@@ -617,7 +617,8 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev)
+ 
+               orefdst = skb->_skb_refdst;
+               skb_dst_set(skb, NULL);
+-              err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, dev);
++              err = ip_route_input(skb, nexthop, iph->saddr, ip4h_dscp(iph),
++                                   dev);
+               rt2 = skb_rtable(skb);
+               if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
+                       skb_dst_drop(skb);
+diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
+index f3324f2a40466..a82d382193e41 100644
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -628,8 +628,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+               }
+               skb_dst_set(skb2, &rt->dst);
+       } else {
+-              if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
+-                                 skb2->dev) ||
++              if (ip_route_input(skb2, eiph->daddr, eiph->saddr,
++                                 ip4h_dscp(eiph), skb2->dev) ||
+                   skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6)
+                       goto out;
+       }
+-- 
+2.39.5
+
diff --git a/queue-6.1/ipv4-icmp-pass-full-ds-field-to-ip_route_input.patch b/queue-6.1/ipv4-icmp-pass-full-ds-field-to-ip_route_input.patch

new file mode 100644 (file)

index 0000000..9d8ea8e
--- /dev/null
+++ b/queue-6.1/ipv4-icmp-pass-full-ds-field-to-ip_route_input.patch
@@ -0,0 +1,44 @@
+From 94c1799e9dda9365958ade9cf0bac70aaa0d1eb3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Aug 2024 15:52:49 +0300
+Subject: ipv4: icmp: Pass full DS field to ip_route_input()
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit 1c6f50b37f711b831d78973dad0df1da99ad0014 ]
+
+Align the ICMP code to other callers of ip_route_input() and pass the
+full DS field. In the future this will allow us to perform a route
+lookup according to the full DSCP value.
+
+No functional changes intended since the upper DSCP bits are masked when
+comparing against the TOS selectors in FIB rules and routes.
+
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: Florian Westphal <fw@strlen.de>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://patch.msgid.link/20240821125251.1571445-11-idosch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 27843ce6ba3d ("ipvlan: ensure network headers are in skb linear part")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/icmp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
+index b05fa424ad5ce..3807a269e0755 100644
+--- a/net/ipv4/icmp.c
++++ b/net/ipv4/icmp.c
+@@ -550,7 +550,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
+               orefdst = skb_in->_skb_refdst; /* save old refdst */
+               skb_dst_set(skb_in, NULL);
+               err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,
+-                                   RT_TOS(tos), rt2->dst.dev);
++                                   tos, rt2->dst.dev);
+ 
+               dst_release(&rt2->dst);
+               rt2 = skb_rtable(skb_in);
+-- 
+2.39.5
+
diff --git a/queue-6.1/ipv4-icmp-unmask-upper-dscp-bits-in-icmp_route_looku.patch b/queue-6.1/ipv4-icmp-unmask-upper-dscp-bits-in-icmp_route_looku.patch

new file mode 100644 (file)

index 0000000..3122aab
--- /dev/null
+++ b/queue-6.1/ipv4-icmp-unmask-upper-dscp-bits-in-icmp_route_looku.patch
@@ -0,0 +1,51 @@
+From 82a825362282a873d74575a1437d17cc84c26a6c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Aug 2024 09:54:50 +0300
+Subject: ipv4: icmp: Unmask upper DSCP bits in icmp_route_lookup()
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit 4805646c42e51d2fbf142864d281473ad453ad5d ]
+
+The function is called to resolve a route for an ICMP message that is
+sent in response to a situation. Based on the type of the generated ICMP
+message, the function is either passed the DS field of the packet that
+generated the ICMP message or a DS field that is derived from it.
+
+Unmask the upper DSCP bits before resolving and output route via
+ip_route_output_key_hash() so that in the future the lookup could be
+performed according to the full DSCP value.
+
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 27843ce6ba3d ("ipvlan: ensure network headers are in skb linear part")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/icmp.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
+index 3807a269e0755..a154339845dd4 100644
+--- a/net/ipv4/icmp.c
++++ b/net/ipv4/icmp.c
+@@ -93,6 +93,7 @@
+ #include <net/ip_fib.h>
+ #include <net/l3mdev.h>
+ #include <net/addrconf.h>
++#include <net/inet_dscp.h>
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/icmp.h>
+ 
+@@ -502,7 +503,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
+       fl4->saddr = saddr;
+       fl4->flowi4_mark = mark;
+       fl4->flowi4_uid = sock_net_uid(net, NULL);
+-      fl4->flowi4_tos = RT_TOS(tos);
++      fl4->flowi4_tos = tos & INET_DSCP_MASK;
+       fl4->flowi4_proto = IPPROTO_ICMP;
+       fl4->fl4_icmp_type = type;
+       fl4->fl4_icmp_code = code;
+-- 
+2.39.5
+
diff --git a/queue-6.1/ipvlan-ensure-network-headers-are-in-skb-linear-part.patch b/queue-6.1/ipvlan-ensure-network-headers-are-in-skb-linear-part.patch

new file mode 100644 (file)

index 0000000..2cc9ea0
--- /dev/null
+++ b/queue-6.1/ipvlan-ensure-network-headers-are-in-skb-linear-part.patch
@@ -0,0 +1,113 @@
+From f7e8fb6cc77cee5f5b699ac13a3bc405f41d41bd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Feb 2025 15:53:36 +0000
+Subject: ipvlan: ensure network headers are in skb linear part
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 27843ce6ba3d3122b65066550fe33fb8839f8aef ]
+
+syzbot found that ipvlan_process_v6_outbound() was assuming
+the IPv6 network header isis present in skb->head [1]
+
+Add the needed pskb_network_may_pull() calls for both
+IPv4 and IPv6 handlers.
+
+[1]
+BUG: KMSAN: uninit-value in __ipv6_addr_type+0xa2/0x490 net/ipv6/addrconf_core.c:47
+  __ipv6_addr_type+0xa2/0x490 net/ipv6/addrconf_core.c:47
+  ipv6_addr_type include/net/ipv6.h:555 [inline]
+  ip6_route_output_flags_noref net/ipv6/route.c:2616 [inline]
+  ip6_route_output_flags+0x51/0x720 net/ipv6/route.c:2651
+  ip6_route_output include/net/ip6_route.h:93 [inline]
+  ipvlan_route_v6_outbound+0x24e/0x520 drivers/net/ipvlan/ipvlan_core.c:476
+  ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:491 [inline]
+  ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:541 [inline]
+  ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:605 [inline]
+  ipvlan_queue_xmit+0xd72/0x1780 drivers/net/ipvlan/ipvlan_core.c:671
+  ipvlan_start_xmit+0x5b/0x210 drivers/net/ipvlan/ipvlan_main.c:223
+  __netdev_start_xmit include/linux/netdevice.h:5150 [inline]
+  netdev_start_xmit include/linux/netdevice.h:5159 [inline]
+  xmit_one net/core/dev.c:3735 [inline]
+  dev_hard_start_xmit+0x247/0xa20 net/core/dev.c:3751
+  sch_direct_xmit+0x399/0xd40 net/sched/sch_generic.c:343
+  qdisc_restart net/sched/sch_generic.c:408 [inline]
+  __qdisc_run+0x14da/0x35d0 net/sched/sch_generic.c:416
+  qdisc_run+0x141/0x4d0 include/net/pkt_sched.h:127
+  net_tx_action+0x78b/0x940 net/core/dev.c:5484
+  handle_softirqs+0x1a0/0x7c0 kernel/softirq.c:561
+  __do_softirq+0x14/0x1a kernel/softirq.c:595
+  do_softirq+0x9a/0x100 kernel/softirq.c:462
+  __local_bh_enable_ip+0x9f/0xb0 kernel/softirq.c:389
+  local_bh_enable include/linux/bottom_half.h:33 [inline]
+  rcu_read_unlock_bh include/linux/rcupdate.h:919 [inline]
+  __dev_queue_xmit+0x2758/0x57d0 net/core/dev.c:4611
+  dev_queue_xmit include/linux/netdevice.h:3311 [inline]
+  packet_xmit+0x9c/0x6c0 net/packet/af_packet.c:276
+  packet_snd net/packet/af_packet.c:3132 [inline]
+  packet_sendmsg+0x93e0/0xa7e0 net/packet/af_packet.c:3164
+  sock_sendmsg_nosec net/socket.c:718 [inline]
+
+Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.")
+Reported-by: syzbot+93ab4a777bafb9d9f960@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/netdev/67b74f01.050a0220.14d86d.02d8.GAE@google.com/T/#u
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Mahesh Bandewar <maheshb@google.com>
+Link: https://patch.msgid.link/20250220155336.61884-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ipvlan/ipvlan_core.c | 21 ++++++++++++++++-----
+ 1 file changed, 16 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
+index 38eb40cba5aac..eea81a7334052 100644
+--- a/drivers/net/ipvlan/ipvlan_core.c
++++ b/drivers/net/ipvlan/ipvlan_core.c
+@@ -416,20 +416,25 @@ struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h,
+ 
+ static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb)
+ {
+-      const struct iphdr *ip4h = ip_hdr(skb);
+       struct net_device *dev = skb->dev;
+       struct net *net = dev_net(dev);
+-      struct rtable *rt;
+       int err, ret = NET_XMIT_DROP;
++      const struct iphdr *ip4h;
++      struct rtable *rt;
+       struct flowi4 fl4 = {
+               .flowi4_oif = dev->ifindex,
+-              .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)),
+               .flowi4_flags = FLOWI_FLAG_ANYSRC,
+               .flowi4_mark = skb->mark,
+-              .daddr = ip4h->daddr,
+-              .saddr = ip4h->saddr,
+       };
+ 
++      if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
++              goto err;
++
++      ip4h = ip_hdr(skb);
++      fl4.daddr = ip4h->daddr;
++      fl4.saddr = ip4h->saddr;
++      fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h));
++
+       rt = ip_route_output_flow(net, &fl4, NULL);
+       if (IS_ERR(rt))
+               goto err;
+@@ -488,6 +493,12 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+       struct net_device *dev = skb->dev;
+       int err, ret = NET_XMIT_DROP;
+ 
++      if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) {
++              DEV_STATS_INC(dev, tx_errors);
++              kfree_skb(skb);
++              return ret;
++      }
++
+       err = ipvlan_route_v6_outbound(dev, skb);
+       if (unlikely(err)) {
+               DEV_STATS_INC(dev, tx_errors);
+-- 
+2.39.5
+
diff --git a/queue-6.1/ipvlan-prepare-ipvlan_process_v4_outbound-to-future-.patch b/queue-6.1/ipvlan-prepare-ipvlan_process_v4_outbound-to-future-.patch

new file mode 100644 (file)

index 0000000..0967b7d
--- /dev/null
+++ b/queue-6.1/ipvlan-prepare-ipvlan_process_v4_outbound-to-future-.patch
@@ -0,0 +1,50 @@
+From 094e8682d0f179f4ec92515ca59d4bc7494fc73d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 30 Oct 2024 13:43:11 +0100
+Subject: ipvlan: Prepare ipvlan_process_v4_outbound() to future .flowi4_tos
+ conversion.
+
+From: Guillaume Nault <gnault@redhat.com>
+
+[ Upstream commit 0c30d6eedd1ec0c1382bcab9576d26413cd278a3 ]
+
+Use ip4h_dscp() to get the DSCP from the IPv4 header, then convert the
+dscp_t value to __u8 with inet_dscp_to_dsfield().
+
+Then, when we'll convert .flowi4_tos to dscp_t, we'll just have to drop
+the inet_dscp_to_dsfield() call.
+
+Signed-off-by: Guillaume Nault <gnault@redhat.com>
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Link: https://patch.msgid.link/f48335504a05b3587e0081a9b4511e0761571ca5.1730292157.git.gnault@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 27843ce6ba3d ("ipvlan: ensure network headers are in skb linear part")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ipvlan/ipvlan_core.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
+index d22a705ac4d6f..38eb40cba5aac 100644
+--- a/drivers/net/ipvlan/ipvlan_core.c
++++ b/drivers/net/ipvlan/ipvlan_core.c
+@@ -3,6 +3,7 @@
+  */
+ 
+ #include <net/inet_dscp.h>
++#include <net/ip.h>
+ 
+ #include "ipvlan.h"
+ 
+@@ -422,7 +423,7 @@ static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb)
+       int err, ret = NET_XMIT_DROP;
+       struct flowi4 fl4 = {
+               .flowi4_oif = dev->ifindex,
+-              .flowi4_tos = ip4h->tos & INET_DSCP_MASK,
++              .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)),
+               .flowi4_flags = FLOWI_FLAG_ANYSRC,
+               .flowi4_mark = skb->mark,
+               .daddr = ip4h->daddr,
+-- 
+2.39.5
+
diff --git a/queue-6.1/ipvlan-unmask-upper-dscp-bits-in-ipvlan_process_v4_o.patch b/queue-6.1/ipvlan-unmask-upper-dscp-bits-in-ipvlan_process_v4_o.patch

new file mode 100644 (file)

index 0000000..dfa2b5e
--- /dev/null
+++ b/queue-6.1/ipvlan-unmask-upper-dscp-bits-in-ipvlan_process_v4_o.patch
@@ -0,0 +1,47 @@
+From a4bb177add982cea591ed6292be1bbf19ef27c4b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Aug 2024 09:54:57 +0300
+Subject: ipvlan: Unmask upper DSCP bits in ipvlan_process_v4_outbound()
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit 939cd1abf080c629552a9c5e6db4c0509d13e4c7 ]
+
+Unmask the upper DSCP bits when calling ip_route_output_flow() so that
+in the future it could perform the FIB lookup according to the full DSCP
+value.
+
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 27843ce6ba3d ("ipvlan: ensure network headers are in skb linear part")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ipvlan/ipvlan_core.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
+index 1d49771d07f4c..d22a705ac4d6f 100644
+--- a/drivers/net/ipvlan/ipvlan_core.c
++++ b/drivers/net/ipvlan/ipvlan_core.c
+@@ -2,6 +2,8 @@
+ /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
+  */
+ 
++#include <net/inet_dscp.h>
++
+ #include "ipvlan.h"
+ 
+ static u32 ipvlan_jhash_secret __read_mostly;
+@@ -420,7 +422,7 @@ static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb)
+       int err, ret = NET_XMIT_DROP;
+       struct flowi4 fl4 = {
+               .flowi4_oif = dev->ifindex,
+-              .flowi4_tos = RT_TOS(ip4h->tos),
++              .flowi4_tos = ip4h->tos & INET_DSCP_MASK,
+               .flowi4_flags = FLOWI_FLAG_ANYSRC,
+               .flowi4_mark = skb->mark,
+               .daddr = ip4h->daddr,
+-- 
+2.39.5
+
diff --git a/queue-6.1/ipvs-always-clear-ipvs_property-flag-in-skb_scrub_pa.patch b/queue-6.1/ipvs-always-clear-ipvs_property-flag-in-skb_scrub_pa.patch

new file mode 100644 (file)

index 0000000..d1c8283
--- /dev/null
+++ b/queue-6.1/ipvs-always-clear-ipvs_property-flag-in-skb_scrub_pa.patch
@@ -0,0 +1,51 @@
+From 01fa3b1054a6e520434a3ca14645c1fd41cb79e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 22 Feb 2025 11:35:18 +0800
+Subject: ipvs: Always clear ipvs_property flag in skb_scrub_packet()
+
+From: Philo Lu <lulie@linux.alibaba.com>
+
+[ Upstream commit de2c211868b9424f9aa9b3432c4430825bafb41b ]
+
+We found an issue when using bpf_redirect with ipvs NAT mode after
+commit ff70202b2d1a ("dev_forward_skb: do not scrub skb mark within
+the same name space"). Particularly, we use bpf_redirect to return
+the skb directly back to the netif it comes from, i.e., xnet is
+false in skb_scrub_packet(), and then ipvs_property is preserved
+and SNAT is skipped in the rx path.
+
+ipvs_property has been already cleared when netns is changed in
+commit 2b5ec1a5f973 ("netfilter/ipvs: clear ipvs_property flag when
+SKB net namespace changed"). This patch just clears it in spite of
+netns.
+
+Fixes: 2b5ec1a5f973 ("netfilter/ipvs: clear ipvs_property flag when SKB net namespace changed")
+Signed-off-by: Philo Lu <lulie@linux.alibaba.com>
+Acked-by: Julian Anastasov <ja@ssi.bg>
+Link: https://patch.msgid.link/20250222033518.126087-1-lulie@linux.alibaba.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/skbuff.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index 768b8d65a5baa..d8a3ada886ffb 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -5556,11 +5556,11 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
+       skb->offload_fwd_mark = 0;
+       skb->offload_l3_fwd_mark = 0;
+ #endif
++      ipvs_reset(skb);
+ 
+       if (!xnet)
+               return;
+ 
+-      ipvs_reset(skb);
+       skb->mark = 0;
+       skb_clear_tstamp(skb);
+ }
+-- 
+2.39.5
+
diff --git a/queue-6.1/net-cadence-macb-synchronize-stats-calculations.patch b/queue-6.1/net-cadence-macb-synchronize-stats-calculations.patch

new file mode 100644 (file)

index 0000000..f129e81
--- /dev/null
+++ b/queue-6.1/net-cadence-macb-synchronize-stats-calculations.patch
@@ -0,0 +1,115 @@
+From efed222a32805570471b63c78320419d1f7a9c8b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Feb 2025 11:29:50 -0500
+Subject: net: cadence: macb: Synchronize stats calculations
+
+From: Sean Anderson <sean.anderson@linux.dev>
+
+[ Upstream commit fa52f15c745ce55261b92873676f64f7348cfe82 ]
+
+Stats calculations involve a RMW to add the stat update to the existing
+value. This is currently not protected by any synchronization mechanism,
+so data races are possible. Add a spinlock to protect the update. The
+reader side could be protected using u64_stats, but we would still need
+a spinlock for the update side anyway. And we always do an update
+immediately before reading the stats anyway.
+
+Fixes: 89e5785fc8a6 ("[PATCH] Atmel MACB ethernet driver")
+Signed-off-by: Sean Anderson <sean.anderson@linux.dev>
+Link: https://patch.msgid.link/20250220162950.95941-1-sean.anderson@linux.dev
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/cadence/macb.h      |  2 ++
+ drivers/net/ethernet/cadence/macb_main.c | 12 ++++++++++--
+ 2 files changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
+index 1aa578c1ca4ad..8d66de71ea604 100644
+--- a/drivers/net/ethernet/cadence/macb.h
++++ b/drivers/net/ethernet/cadence/macb.h
+@@ -1271,6 +1271,8 @@ struct macb {
+       struct clk              *rx_clk;
+       struct clk              *tsu_clk;
+       struct net_device       *dev;
++      /* Protects hw_stats and ethtool_stats */
++      spinlock_t              stats_lock;
+       union {
+               struct macb_stats       macb;
+               struct gem_stats        gem;
+diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
+index d44d53d697620..fc3342944dbcc 100644
+--- a/drivers/net/ethernet/cadence/macb_main.c
++++ b/drivers/net/ethernet/cadence/macb_main.c
+@@ -1936,10 +1936,12 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
+ 
+               if (status & MACB_BIT(ISR_ROVR)) {
+                       /* We missed at least one packet */
++                      spin_lock(&bp->stats_lock);
+                       if (macb_is_gem(bp))
+                               bp->hw_stats.gem.rx_overruns++;
+                       else
+                               bp->hw_stats.macb.rx_overruns++;
++                      spin_unlock(&bp->stats_lock);
+ 
+                       if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
+                               queue_writel(queue, ISR, MACB_BIT(ISR_ROVR));
+@@ -2999,6 +3001,7 @@ static struct net_device_stats *gem_get_stats(struct macb *bp)
+       if (!netif_running(bp->dev))
+               return nstat;
+ 
++      spin_lock_irq(&bp->stats_lock);
+       gem_update_stats(bp);
+ 
+       nstat->rx_errors = (hwstat->rx_frame_check_sequence_errors +
+@@ -3028,6 +3031,7 @@ static struct net_device_stats *gem_get_stats(struct macb *bp)
+       nstat->tx_aborted_errors = hwstat->tx_excessive_collisions;
+       nstat->tx_carrier_errors = hwstat->tx_carrier_sense_errors;
+       nstat->tx_fifo_errors = hwstat->tx_underrun;
++      spin_unlock_irq(&bp->stats_lock);
+ 
+       return nstat;
+ }
+@@ -3035,12 +3039,13 @@ static struct net_device_stats *gem_get_stats(struct macb *bp)
+ static void gem_get_ethtool_stats(struct net_device *dev,
+                                 struct ethtool_stats *stats, u64 *data)
+ {
+-      struct macb *bp;
++      struct macb *bp = netdev_priv(dev);
+ 
+-      bp = netdev_priv(dev);
++      spin_lock_irq(&bp->stats_lock);
+       gem_update_stats(bp);
+       memcpy(data, &bp->ethtool_stats, sizeof(u64)
+                       * (GEM_STATS_LEN + QUEUE_STATS_LEN * MACB_MAX_QUEUES));
++      spin_unlock_irq(&bp->stats_lock);
+ }
+ 
+ static int gem_get_sset_count(struct net_device *dev, int sset)
+@@ -3090,6 +3095,7 @@ static struct net_device_stats *macb_get_stats(struct net_device *dev)
+               return gem_get_stats(bp);
+ 
+       /* read stats from hardware */
++      spin_lock_irq(&bp->stats_lock);
+       macb_update_stats(bp);
+ 
+       /* Convert HW stats into netdevice stats */
+@@ -3123,6 +3129,7 @@ static struct net_device_stats *macb_get_stats(struct net_device *dev)
+       nstat->tx_carrier_errors = hwstat->tx_carrier_errors;
+       nstat->tx_fifo_errors = hwstat->tx_underruns;
+       /* Don't know about heartbeat or window errors... */
++      spin_unlock_irq(&bp->stats_lock);
+ 
+       return nstat;
+ }
+@@ -4949,6 +4956,7 @@ static int macb_probe(struct platform_device *pdev)
+       bp->usrio = macb_config->usrio;
+ 
+       spin_lock_init(&bp->lock);
++      spin_lock_init(&bp->stats_lock);
+ 
+       /* setup capabilities */
+       macb_configure_caps(bp, macb_config);
+-- 
+2.39.5
+
diff --git a/queue-6.1/net-clear-old-fragment-checksum-value-in-napi_reuse_.patch b/queue-6.1/net-clear-old-fragment-checksum-value-in-napi_reuse_.patch

new file mode 100644 (file)

index 0000000..b66cb45
--- /dev/null
+++ b/queue-6.1/net-clear-old-fragment-checksum-value-in-napi_reuse_.patch
@@ -0,0 +1,77 @@
+From a8a59c84507057bcdeecb41267c8b25b8a3f0351 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Feb 2025 13:28:52 +0200
+Subject: net: Clear old fragment checksum value in napi_reuse_skb
+
+From: Mohammad Heib <mheib@redhat.com>
+
+[ Upstream commit 49806fe6e61b045b5be8610e08b5a3083c109aa0 ]
+
+In certain cases, napi_get_frags() returns an skb that points to an old
+received fragment, This skb may have its skb->ip_summed, csum, and other
+fields set from previous fragment handling.
+
+Some network drivers set skb->ip_summed to either CHECKSUM_COMPLETE or
+CHECKSUM_UNNECESSARY when getting skb from napi_get_frags(), while
+others only set skb->ip_summed when RX checksum offload is enabled on
+the device, and do not set any value for skb->ip_summed when hardware
+checksum offload is disabled, assuming that the skb->ip_summed
+initiated to zero by napi_reuse_skb, ionic driver for example will
+ignore/unset any value for the ip_summed filed if HW checksum offload is
+disabled, and if we have a situation where the user disables the
+checksum offload during a traffic that could lead to the following
+errors shown in the kernel logs:
+<IRQ>
+dump_stack_lvl+0x34/0x48
+ __skb_gro_checksum_complete+0x7e/0x90
+tcp6_gro_receive+0xc6/0x190
+ipv6_gro_receive+0x1ec/0x430
+dev_gro_receive+0x188/0x360
+? ionic_rx_clean+0x25a/0x460 [ionic]
+napi_gro_frags+0x13c/0x300
+? __pfx_ionic_rx_service+0x10/0x10 [ionic]
+ionic_rx_service+0x67/0x80 [ionic]
+ionic_cq_service+0x58/0x90 [ionic]
+ionic_txrx_napi+0x64/0x1b0 [ionic]
+ __napi_poll+0x27/0x170
+net_rx_action+0x29c/0x370
+handle_softirqs+0xce/0x270
+__irq_exit_rcu+0xa3/0xc0
+common_interrupt+0x80/0xa0
+</IRQ>
+
+This inconsistency sometimes leads to checksum validation issues in the
+upper layers of the network stack.
+
+To resolve this, this patch clears the skb->ip_summed value for each
+reused skb in by napi_reuse_skb(), ensuring that the caller is responsible
+for setting the correct checksum status. This eliminates potential
+checksum validation issues caused by improper handling of
+skb->ip_summed.
+
+Fixes: 76620aafd66f ("gro: New frags interface to avoid copying shinfo")
+Signed-off-by: Mohammad Heib <mheib@redhat.com>
+Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20250225112852.2507709-1-mheib@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/gro.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/core/gro.c b/net/core/gro.c
+index 47118e97ecfdd..c4cbf398c5f78 100644
+--- a/net/core/gro.c
++++ b/net/core/gro.c
+@@ -679,6 +679,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
+       skb->pkt_type = PACKET_HOST;
+ 
+       skb->encapsulation = 0;
++      skb->ip_summed = CHECKSUM_NONE;
+       skb_shinfo(skb)->gso_type = 0;
+       skb_shinfo(skb)->gso_size = 0;
+       if (unlikely(skb->slow_gro)) {
+-- 
+2.39.5
+
diff --git a/queue-6.1/net-ipv4-add-tracepoint-for-icmp_send.patch b/queue-6.1/net-ipv4-add-tracepoint-for-icmp_send.patch

new file mode 100644 (file)

index 0000000..746b5ab
--- /dev/null
+++ b/queue-6.1/net-ipv4-add-tracepoint-for-icmp_send.patch
@@ -0,0 +1,153 @@
+From d4efcf4743e3568a179f36d9fc4c98b970b38ef3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 May 2024 15:41:03 +0800
+Subject: net/ipv4: add tracepoint for icmp_send
+
+From: Peilin He <he.peilin@zte.com.cn>
+
+[ Upstream commit db3efdcf70c752e8a8deb16071d8e693c3ef8746 ]
+
+Introduce a tracepoint for icmp_send, which can help users to get more
+detail information conveniently when icmp abnormal events happen.
+
+1. Giving an usecase example:
+=============================
+When an application experiences packet loss due to an unreachable UDP
+destination port, the kernel will send an exception message through the
+icmp_send function. By adding a trace point for icmp_send, developers or
+system administrators can obtain detailed information about the UDP
+packet loss, including the type, code, source address, destination address,
+source port, and destination port. This facilitates the trouble-shooting
+of UDP packet loss issues especially for those network-service
+applications.
+
+2. Operation Instructions:
+==========================
+Switch to the tracing directory.
+        cd /sys/kernel/tracing
+Filter for destination port unreachable.
+        echo "type==3 && code==3" > events/icmp/icmp_send/filter
+Enable trace event.
+        echo 1 > events/icmp/icmp_send/enable
+
+3. Result View:
+================
+ udp_client_erro-11370   [002] ...s.12   124.728002:
+ icmp_send: icmp_send: type=3, code=3.
+ From 127.0.0.1:41895 to 127.0.0.1:6666 ulen=23
+ skbaddr=00000000589b167a
+
+Signed-off-by: Peilin He <he.peilin@zte.com.cn>
+Signed-off-by: xu xin <xu.xin16@zte.com.cn>
+Reviewed-by: Yunkai Zhang <zhang.yunkai@zte.com.cn>
+Cc: Yang Yang <yang.yang29@zte.com.cn>
+Cc: Liu Chun <liu.chun2@zte.com.cn>
+Cc: Xuexin Jiang <jiang.xuexin@zte.com.cn>
+Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 27843ce6ba3d ("ipvlan: ensure network headers are in skb linear part")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/trace/events/icmp.h | 67 +++++++++++++++++++++++++++++++++++++
+ net/ipv4/icmp.c             |  4 +++
+ 2 files changed, 71 insertions(+)
+ create mode 100644 include/trace/events/icmp.h
+
+diff --git a/include/trace/events/icmp.h b/include/trace/events/icmp.h
+new file mode 100644
+index 0000000000000..31559796949a7
+--- /dev/null
++++ b/include/trace/events/icmp.h
+@@ -0,0 +1,67 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#undef TRACE_SYSTEM
++#define TRACE_SYSTEM icmp
++
++#if !defined(_TRACE_ICMP_H) || defined(TRACE_HEADER_MULTI_READ)
++#define _TRACE_ICMP_H
++
++#include <linux/icmp.h>
++#include <linux/tracepoint.h>
++
++TRACE_EVENT(icmp_send,
++
++              TP_PROTO(const struct sk_buff *skb, int type, int code),
++
++              TP_ARGS(skb, type, code),
++
++              TP_STRUCT__entry(
++                      __field(const void *, skbaddr)
++                      __field(int, type)
++                      __field(int, code)
++                      __array(__u8, saddr, 4)
++                      __array(__u8, daddr, 4)
++                      __field(__u16, sport)
++                      __field(__u16, dport)
++                      __field(unsigned short, ulen)
++              ),
++
++              TP_fast_assign(
++                      struct iphdr *iph = ip_hdr(skb);
++                      struct udphdr *uh = udp_hdr(skb);
++                      int proto_4 = iph->protocol;
++                      __be32 *p32;
++
++                      __entry->skbaddr = skb;
++                      __entry->type = type;
++                      __entry->code = code;
++
++                      if (proto_4 != IPPROTO_UDP || (u8 *)uh < skb->head ||
++                              (u8 *)uh + sizeof(struct udphdr)
++                              > skb_tail_pointer(skb)) {
++                              __entry->sport = 0;
++                              __entry->dport = 0;
++                              __entry->ulen = 0;
++                      } else {
++                              __entry->sport = ntohs(uh->source);
++                              __entry->dport = ntohs(uh->dest);
++                              __entry->ulen = ntohs(uh->len);
++                      }
++
++                      p32 = (__be32 *) __entry->saddr;
++                      *p32 = iph->saddr;
++
++                      p32 = (__be32 *) __entry->daddr;
++                      *p32 = iph->daddr;
++              ),
++
++              TP_printk("icmp_send: type=%d, code=%d. From %pI4:%u to %pI4:%u ulen=%d skbaddr=%p",
++                      __entry->type, __entry->code,
++                      __entry->saddr, __entry->sport, __entry->daddr,
++                      __entry->dport, __entry->ulen, __entry->skbaddr)
++);
++
++#endif /* _TRACE_ICMP_H */
++
++/* This part must be outside protection */
++#include <trace/define_trace.h>
++
+diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
+index a21d32b3ae6c3..b05fa424ad5ce 100644
+--- a/net/ipv4/icmp.c
++++ b/net/ipv4/icmp.c
+@@ -93,6 +93,8 @@
+ #include <net/ip_fib.h>
+ #include <net/l3mdev.h>
+ #include <net/addrconf.h>
++#define CREATE_TRACE_POINTS
++#include <trace/events/icmp.h>
+ 
+ /*
+  *    Build xmit assembly blocks
+@@ -778,6 +780,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+       if (!fl4.saddr)
+               fl4.saddr = htonl(INADDR_DUMMY);
+ 
++      trace_icmp_send(skb_in, type, code);
++
+       icmp_push_reply(sk, &icmp_param, &fl4, &ipc, &rt);
+ ende:
+       ip_rt_put(rt);
+-- 
+2.39.5
+
diff --git a/queue-6.1/net-ipv6-fix-dst-ref-loop-on-input-in-rpl-lwt.patch b/queue-6.1/net-ipv6-fix-dst-ref-loop-on-input-in-rpl-lwt.patch

new file mode 100644 (file)

index 0000000..9c297a9
--- /dev/null
+++ b/queue-6.1/net-ipv6-fix-dst-ref-loop-on-input-in-rpl-lwt.patch
@@ -0,0 +1,60 @@
+From 0dcd93ff196729cdfcad0a1a07b22d2acf2066b5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Feb 2025 18:51:39 +0100
+Subject: net: ipv6: fix dst ref loop on input in rpl lwt
+
+From: Justin Iurman <justin.iurman@uliege.be>
+
+[ Upstream commit 13e55fbaec176119cff68a7e1693b251c8883c5f ]
+
+Prevent a dst ref loop on input in rpl_iptunnel.
+
+Fixes: a7a29f9c361f ("net: ipv6: add rpl sr tunnel")
+Cc: Alexander Aring <alex.aring@gmail.com>
+Cc: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/rpl_iptunnel.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
+index 69b9bd90140dd..862ac1e2e191c 100644
+--- a/net/ipv6/rpl_iptunnel.c
++++ b/net/ipv6/rpl_iptunnel.c
+@@ -259,10 +259,18 @@ static int rpl_input(struct sk_buff *skb)
+ {
+       struct dst_entry *orig_dst = skb_dst(skb);
+       struct dst_entry *dst = NULL;
++      struct lwtunnel_state *lwtst;
+       struct rpl_lwt *rlwt;
+       int err;
+ 
+-      rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate);
++      /* We cannot dereference "orig_dst" once ip6_route_input() or
++       * skb_dst_drop() is called. However, in order to detect a dst loop, we
++       * need the address of its lwtstate. So, save the address of lwtstate
++       * now and use it later as a comparison.
++       */
++      lwtst = orig_dst->lwtstate;
++
++      rlwt = rpl_lwt_lwtunnel(lwtst);
+ 
+       local_bh_disable();
+       dst = dst_cache_get(&rlwt->cache);
+@@ -277,7 +285,9 @@ static int rpl_input(struct sk_buff *skb)
+       if (!dst) {
+               ip6_route_input(skb);
+               dst = skb_dst(skb);
+-              if (!dst->error) {
++
++              /* cache only if we don't create a dst reference loop */
++              if (!dst->error && lwtst != dst->lwtstate) {
+                       local_bh_disable();
+                       dst_cache_set_ip6(&rlwt->cache, dst,
+                                         &ipv6_hdr(skb)->saddr);
+-- 
+2.39.5
+
diff --git a/queue-6.1/net-ipv6-fix-dst-ref-loop-on-input-in-seg6-lwt.patch b/queue-6.1/net-ipv6-fix-dst-ref-loop-on-input-in-seg6-lwt.patch

new file mode 100644 (file)

index 0000000..856ebf0
--- /dev/null
+++ b/queue-6.1/net-ipv6-fix-dst-ref-loop-on-input-in-seg6-lwt.patch
@@ -0,0 +1,60 @@
+From 930eea7dd45aac86e88ea450e3a5be502d409592 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Feb 2025 18:51:38 +0100
+Subject: net: ipv6: fix dst ref loop on input in seg6 lwt
+
+From: Justin Iurman <justin.iurman@uliege.be>
+
+[ Upstream commit c64a0727f9b1cbc63a5538c8c0014e9a175ad864 ]
+
+Prevent a dst ref loop on input in seg6_iptunnel.
+
+Fixes: af4a2209b134 ("ipv6: sr: use dst_cache in seg6_input")
+Cc: David Lebrun <dlebrun@google.com>
+Cc: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/seg6_iptunnel.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
+index c161298c8b335..b186d85ec5b3f 100644
+--- a/net/ipv6/seg6_iptunnel.c
++++ b/net/ipv6/seg6_iptunnel.c
+@@ -472,10 +472,18 @@ static int seg6_input_core(struct net *net, struct sock *sk,
+ {
+       struct dst_entry *orig_dst = skb_dst(skb);
+       struct dst_entry *dst = NULL;
++      struct lwtunnel_state *lwtst;
+       struct seg6_lwt *slwt;
+       int err;
+ 
+-      slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
++      /* We cannot dereference "orig_dst" once ip6_route_input() or
++       * skb_dst_drop() is called. However, in order to detect a dst loop, we
++       * need the address of its lwtstate. So, save the address of lwtstate
++       * now and use it later as a comparison.
++       */
++      lwtst = orig_dst->lwtstate;
++
++      slwt = seg6_lwt_lwtunnel(lwtst);
+ 
+       local_bh_disable();
+       dst = dst_cache_get(&slwt->cache);
+@@ -490,7 +498,9 @@ static int seg6_input_core(struct net *net, struct sock *sk,
+       if (!dst) {
+               ip6_route_input(skb);
+               dst = skb_dst(skb);
+-              if (!dst->error) {
++
++              /* cache only if we don't create a dst reference loop */
++              if (!dst->error && lwtst != dst->lwtstate) {
+                       local_bh_disable();
+                       dst_cache_set_ip6(&slwt->cache, dst,
+                                         &ipv6_hdr(skb)->saddr);
+-- 
+2.39.5
+
diff --git a/queue-6.1/net-ipv6-rpl_iptunnel-mitigate-2-realloc-issue.patch b/queue-6.1/net-ipv6-rpl_iptunnel-mitigate-2-realloc-issue.patch

new file mode 100644 (file)

index 0000000..35e4422
--- /dev/null
+++ b/queue-6.1/net-ipv6-rpl_iptunnel-mitigate-2-realloc-issue.patch
@@ -0,0 +1,155 @@
+From 06016721218287aa40735e5ef106176afd3ef85a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Dec 2024 13:49:45 +0100
+Subject: net: ipv6: rpl_iptunnel: mitigate 2-realloc issue
+
+From: Justin Iurman <justin.iurman@uliege.be>
+
+[ Upstream commit 985ec6f5e6235242191370628acb73d7a9f0c0ea ]
+
+This patch mitigates the two-reallocations issue with rpl_iptunnel by
+providing the dst_entry (in the cache) to the first call to
+skb_cow_head(). As a result, the very first iteration would still
+trigger two reallocations (i.e., empty cache), while next iterations
+would only trigger a single reallocation.
+
+Performance tests before/after applying this patch, which clearly shows
+there is no impact (it even shows improvement):
+- before: https://ibb.co/nQJhqwc
+- after: https://ibb.co/4ZvW6wV
+
+Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
+Cc: Alexander Aring <aahringo@redhat.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 13e55fbaec17 ("net: ipv6: fix dst ref loop on input in rpl lwt")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/rpl_iptunnel.c | 46 ++++++++++++++++++++++-------------------
+ 1 file changed, 25 insertions(+), 21 deletions(-)
+
+diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
+index c1d0f947a7c87..69b9bd90140dd 100644
+--- a/net/ipv6/rpl_iptunnel.c
++++ b/net/ipv6/rpl_iptunnel.c
+@@ -125,7 +125,8 @@ static void rpl_destroy_state(struct lwtunnel_state *lwt)
+ }
+ 
+ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
+-                           const struct ipv6_rpl_sr_hdr *srh)
++                           const struct ipv6_rpl_sr_hdr *srh,
++                           struct dst_entry *cache_dst)
+ {
+       struct ipv6_rpl_sr_hdr *isrh, *csrh;
+       const struct ipv6hdr *oldhdr;
+@@ -153,7 +154,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
+ 
+       hdrlen = ((csrh->hdrlen + 1) << 3);
+ 
+-      err = skb_cow_head(skb, hdrlen + skb->mac_len);
++      err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
+       if (unlikely(err)) {
+               kfree(buf);
+               return err;
+@@ -186,7 +187,8 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
+       return 0;
+ }
+ 
+-static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt)
++static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt,
++                    struct dst_entry *cache_dst)
+ {
+       struct dst_entry *dst = skb_dst(skb);
+       struct rpl_iptunnel_encap *tinfo;
+@@ -196,7 +198,7 @@ static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt)
+ 
+       tinfo = rpl_encap_lwtunnel(dst->lwtstate);
+ 
+-      return rpl_do_srh_inline(skb, rlwt, tinfo->srh);
++      return rpl_do_srh_inline(skb, rlwt, tinfo->srh, cache_dst);
+ }
+ 
+ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+@@ -208,14 +210,14 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+ 
+       rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate);
+ 
+-      err = rpl_do_srh(skb, rlwt);
+-      if (unlikely(err))
+-              goto drop;
+-
+       local_bh_disable();
+       dst = dst_cache_get(&rlwt->cache);
+       local_bh_enable();
+ 
++      err = rpl_do_srh(skb, rlwt, dst);
++      if (unlikely(err))
++              goto drop;
++
+       if (unlikely(!dst)) {
+               struct ipv6hdr *hdr = ipv6_hdr(skb);
+               struct flowi6 fl6;
+@@ -237,15 +239,15 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+               local_bh_disable();
+               dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr);
+               local_bh_enable();
++
++              err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
++              if (unlikely(err))
++                      goto drop;
+       }
+ 
+       skb_dst_drop(skb);
+       skb_dst_set(skb, dst);
+ 
+-      err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+-      if (unlikely(err))
+-              goto drop;
+-
+       return dst_output(net, sk, skb);
+ 
+ drop:
+@@ -262,12 +264,13 @@ static int rpl_input(struct sk_buff *skb)
+ 
+       rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate);
+ 
+-      err = rpl_do_srh(skb, rlwt);
+-      if (unlikely(err))
+-              goto drop;
+-
+       local_bh_disable();
+       dst = dst_cache_get(&rlwt->cache);
++      local_bh_enable();
++
++      err = rpl_do_srh(skb, rlwt, dst);
++      if (unlikely(err))
++              goto drop;
+ 
+       skb_dst_drop(skb);
+ 
+@@ -275,17 +278,18 @@ static int rpl_input(struct sk_buff *skb)
+               ip6_route_input(skb);
+               dst = skb_dst(skb);
+               if (!dst->error) {
++                      local_bh_disable();
+                       dst_cache_set_ip6(&rlwt->cache, dst,
+                                         &ipv6_hdr(skb)->saddr);
++                      local_bh_enable();
+               }
++
++              err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
++              if (unlikely(err))
++                      goto drop;
+       } else {
+               skb_dst_set(skb, dst);
+       }
+-      local_bh_enable();
+-
+-      err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+-      if (unlikely(err))
+-              goto drop;
+ 
+       return dst_input(skb);
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.1/net-ipv6-seg6_iptunnel-mitigate-2-realloc-issue.patch b/queue-6.1/net-ipv6-seg6_iptunnel-mitigate-2-realloc-issue.patch

new file mode 100644 (file)

index 0000000..830cc94
--- /dev/null
+++ b/queue-6.1/net-ipv6-seg6_iptunnel-mitigate-2-realloc-issue.patch
@@ -0,0 +1,255 @@
+From ce40c0dcf493d7ab3793ef8271dd62197a32bc24 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Dec 2024 13:49:44 +0100
+Subject: net: ipv6: seg6_iptunnel: mitigate 2-realloc issue
+
+From: Justin Iurman <justin.iurman@uliege.be>
+
+[ Upstream commit 40475b63761abb6f8fdef960d03228a08662c9c4 ]
+
+This patch mitigates the two-reallocations issue with seg6_iptunnel by
+providing the dst_entry (in the cache) to the first call to
+skb_cow_head(). As a result, the very first iteration would still
+trigger two reallocations (i.e., empty cache), while next iterations
+would only trigger a single reallocation.
+
+Performance tests before/after applying this patch, which clearly shows
+the improvement:
+- before: https://ibb.co/3Cg4sNH
+- after: https://ibb.co/8rQ350r
+
+Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
+Cc: David Lebrun <dlebrun@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: c64a0727f9b1 ("net: ipv6: fix dst ref loop on input in seg6 lwt")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/seg6_iptunnel.c | 85 ++++++++++++++++++++++++----------------
+ 1 file changed, 52 insertions(+), 33 deletions(-)
+
+diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
+index ae5299c277bcf..c161298c8b335 100644
+--- a/net/ipv6/seg6_iptunnel.c
++++ b/net/ipv6/seg6_iptunnel.c
+@@ -124,8 +124,8 @@ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
+       return flowlabel;
+ }
+ 
+-/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
+-int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
++static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
++                             int proto, struct dst_entry *cache_dst)
+ {
+       struct dst_entry *dst = skb_dst(skb);
+       struct net *net = dev_net(dst->dev);
+@@ -137,7 +137,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
+       hdrlen = (osrh->hdrlen + 1) << 3;
+       tot_len = hdrlen + sizeof(*hdr);
+ 
+-      err = skb_cow_head(skb, tot_len + skb->mac_len);
++      err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb));
+       if (unlikely(err))
+               return err;
+ 
+@@ -197,11 +197,18 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
+ 
+       return 0;
+ }
++
++/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
++int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
++{
++      return __seg6_do_srh_encap(skb, osrh, proto, NULL);
++}
+ EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
+ 
+ /* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */
+ static int seg6_do_srh_encap_red(struct sk_buff *skb,
+-                               struct ipv6_sr_hdr *osrh, int proto)
++                               struct ipv6_sr_hdr *osrh, int proto,
++                               struct dst_entry *cache_dst)
+ {
+       __u8 first_seg = osrh->first_segment;
+       struct dst_entry *dst = skb_dst(skb);
+@@ -230,7 +237,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb,
+ 
+       tot_len = red_hdrlen + sizeof(struct ipv6hdr);
+ 
+-      err = skb_cow_head(skb, tot_len + skb->mac_len);
++      err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb));
+       if (unlikely(err))
+               return err;
+ 
+@@ -317,8 +324,8 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb,
+       return 0;
+ }
+ 
+-/* insert an SRH within an IPv6 packet, just after the IPv6 header */
+-int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
++static int __seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
++                              struct dst_entry *cache_dst)
+ {
+       struct ipv6hdr *hdr, *oldhdr;
+       struct ipv6_sr_hdr *isrh;
+@@ -326,7 +333,7 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+ 
+       hdrlen = (osrh->hdrlen + 1) << 3;
+ 
+-      err = skb_cow_head(skb, hdrlen + skb->mac_len);
++      err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
+       if (unlikely(err))
+               return err;
+ 
+@@ -369,9 +376,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+ 
+       return 0;
+ }
+-EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
+ 
+-static int seg6_do_srh(struct sk_buff *skb)
++static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst)
+ {
+       struct dst_entry *dst = skb_dst(skb);
+       struct seg6_iptunnel_encap *tinfo;
+@@ -384,7 +390,7 @@ static int seg6_do_srh(struct sk_buff *skb)
+               if (skb->protocol != htons(ETH_P_IPV6))
+                       return -EINVAL;
+ 
+-              err = seg6_do_srh_inline(skb, tinfo->srh);
++              err = __seg6_do_srh_inline(skb, tinfo->srh, cache_dst);
+               if (err)
+                       return err;
+               break;
+@@ -402,9 +408,11 @@ static int seg6_do_srh(struct sk_buff *skb)
+                       return -EINVAL;
+ 
+               if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP)
+-                      err = seg6_do_srh_encap(skb, tinfo->srh, proto);
++                      err = __seg6_do_srh_encap(skb, tinfo->srh,
++                                                proto, cache_dst);
+               else
+-                      err = seg6_do_srh_encap_red(skb, tinfo->srh, proto);
++                      err = seg6_do_srh_encap_red(skb, tinfo->srh,
++                                                  proto, cache_dst);
+ 
+               if (err)
+                       return err;
+@@ -425,11 +433,13 @@ static int seg6_do_srh(struct sk_buff *skb)
+               skb_push(skb, skb->mac_len);
+ 
+               if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP)
+-                      err = seg6_do_srh_encap(skb, tinfo->srh,
+-                                              IPPROTO_ETHERNET);
++                      err = __seg6_do_srh_encap(skb, tinfo->srh,
++                                                IPPROTO_ETHERNET,
++                                                cache_dst);
+               else
+                       err = seg6_do_srh_encap_red(skb, tinfo->srh,
+-                                                  IPPROTO_ETHERNET);
++                                                  IPPROTO_ETHERNET,
++                                                  cache_dst);
+ 
+               if (err)
+                       return err;
+@@ -444,6 +454,13 @@ static int seg6_do_srh(struct sk_buff *skb)
+       return 0;
+ }
+ 
++/* insert an SRH within an IPv6 packet, just after the IPv6 header */
++int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
++{
++      return __seg6_do_srh_inline(skb, osrh, NULL);
++}
++EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
++
+ static int seg6_input_finish(struct net *net, struct sock *sk,
+                            struct sk_buff *skb)
+ {
+@@ -458,14 +475,15 @@ static int seg6_input_core(struct net *net, struct sock *sk,
+       struct seg6_lwt *slwt;
+       int err;
+ 
+-      err = seg6_do_srh(skb);
+-      if (unlikely(err))
+-              goto drop;
+-
+       slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
+ 
+       local_bh_disable();
+       dst = dst_cache_get(&slwt->cache);
++      local_bh_enable();
++
++      err = seg6_do_srh(skb, dst);
++      if (unlikely(err))
++              goto drop;
+ 
+       skb_dst_drop(skb);
+ 
+@@ -473,17 +491,18 @@ static int seg6_input_core(struct net *net, struct sock *sk,
+               ip6_route_input(skb);
+               dst = skb_dst(skb);
+               if (!dst->error) {
++                      local_bh_disable();
+                       dst_cache_set_ip6(&slwt->cache, dst,
+                                         &ipv6_hdr(skb)->saddr);
++                      local_bh_enable();
+               }
++
++              err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
++              if (unlikely(err))
++                      goto drop;
+       } else {
+               skb_dst_set(skb, dst);
+       }
+-      local_bh_enable();
+-
+-      err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+-      if (unlikely(err))
+-              goto drop;
+ 
+       if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+               return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+@@ -529,16 +548,16 @@ static int seg6_output_core(struct net *net, struct sock *sk,
+       struct seg6_lwt *slwt;
+       int err;
+ 
+-      err = seg6_do_srh(skb);
+-      if (unlikely(err))
+-              goto drop;
+-
+       slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
+ 
+       local_bh_disable();
+       dst = dst_cache_get(&slwt->cache);
+       local_bh_enable();
+ 
++      err = seg6_do_srh(skb, dst);
++      if (unlikely(err))
++              goto drop;
++
+       if (unlikely(!dst)) {
+               struct ipv6hdr *hdr = ipv6_hdr(skb);
+               struct flowi6 fl6;
+@@ -560,15 +579,15 @@ static int seg6_output_core(struct net *net, struct sock *sk,
+               local_bh_disable();
+               dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
+               local_bh_enable();
++
++              err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
++              if (unlikely(err))
++                      goto drop;
+       }
+ 
+       skb_dst_drop(skb);
+       skb_dst_set(skb, dst);
+ 
+-      err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+-      if (unlikely(err))
+-              goto drop;
+-
+       if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+               return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
+                              NULL, skb_dst(skb)->dev, dst_output);
+-- 
+2.39.5
+
diff --git a/queue-6.1/net-loopback-avoid-sending-ip-packets-without-an-eth.patch b/queue-6.1/net-loopback-avoid-sending-ip-packets-without-an-eth.patch

new file mode 100644 (file)

index 0000000..eb61730
--- /dev/null
+++ b/queue-6.1/net-loopback-avoid-sending-ip-packets-without-an-eth.patch
@@ -0,0 +1,94 @@
+From ec4f8ab14ccc843b21181b75e9f9b0f454be3f8a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Feb 2025 09:25:59 +0200
+Subject: net: loopback: Avoid sending IP packets without an Ethernet header
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit 0e4427f8f587c4b603475468bb3aee9418574893 ]
+
+After commit 22600596b675 ("ipv4: give an IPv4 dev to blackhole_netdev")
+IPv4 neighbors can be constructed on the blackhole net device, but they
+are constructed with an output function (neigh_direct_output()) that
+simply calls dev_queue_xmit(). The latter will transmit packets via
+'skb->dev' which might not be the blackhole net device if dst_dev_put()
+switched 'dst->dev' to the blackhole net device while another CPU was
+using the dst entry in ip_output(), but after it already initialized
+'skb->dev' from 'dst->dev'.
+
+Specifically, the following can happen:
+
+    CPU1                                      CPU2
+
+udp_sendmsg(sk1)                          udp_sendmsg(sk2)
+udp_send_skb()                            [...]
+ip_output()
+    skb->dev = skb_dst(skb)->dev
+                                          dst_dev_put()
+                                              dst->dev = blackhole_netdev
+ip_finish_output2()
+    resolves neigh on dst->dev
+neigh_output()
+neigh_direct_output()
+dev_queue_xmit()
+
+This will result in IPv4 packets being sent without an Ethernet header
+via a valid net device:
+
+tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
+listening on enp9s0, link-type EN10MB (Ethernet), snapshot length 262144 bytes
+22:07:02.329668 20:00:40:11:18:fb > 45:00:00:44:f4:94, ethertype Unknown
+(0x58c6), length 68:
+        0x0000:  8dda 74ca f1ae ca6c ca6c 0098 969c 0400  ..t....l.l......
+        0x0010:  0000 4730 3f18 6800 0000 0000 0000 9971  ..G0?.h........q
+        0x0020:  c4c9 9055 a157 0a70 9ead bf83 38ca ab38  ...U.W.p....8..8
+        0x0030:  8add ab96 e052                           .....R
+
+Fix by making sure that neighbors are constructed on top of the
+blackhole net device with an output function that simply consumes the
+packets, in a similar fashion to dst_discard_out() and
+blackhole_netdev_xmit().
+
+Fixes: 8d7017fd621d ("blackhole_netdev: use blackhole_netdev to invalidate dst entries")
+Fixes: 22600596b675 ("ipv4: give an IPv4 dev to blackhole_netdev")
+Reported-by: Florian Meister <fmei@sfs.com>
+Closes: https://lore.kernel.org/netdev/20250210084931.23a5c2e4@hermes.local/
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20250220072559.782296-1-idosch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/loopback.c | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
+index 2e9742952c4e9..b213397672d22 100644
+--- a/drivers/net/loopback.c
++++ b/drivers/net/loopback.c
+@@ -246,8 +246,22 @@ static netdev_tx_t blackhole_netdev_xmit(struct sk_buff *skb,
+       return NETDEV_TX_OK;
+ }
+ 
++static int blackhole_neigh_output(struct neighbour *n, struct sk_buff *skb)
++{
++      kfree_skb(skb);
++      return 0;
++}
++
++static int blackhole_neigh_construct(struct net_device *dev,
++                                   struct neighbour *n)
++{
++      n->output = blackhole_neigh_output;
++      return 0;
++}
++
+ static const struct net_device_ops blackhole_netdev_ops = {
+       .ndo_start_xmit = blackhole_netdev_xmit,
++      .ndo_neigh_construct = blackhole_neigh_construct,
+ };
+ 
+ /* This is a dst-dummy device used specifically for invalidated
+-- 
+2.39.5
+
diff --git a/queue-6.1/net-mlx5-irq-fix-null-string-in-debug-print.patch b/queue-6.1/net-mlx5-irq-fix-null-string-in-debug-print.patch

new file mode 100644 (file)

index 0000000..9f57411
--- /dev/null
+++ b/queue-6.1/net-mlx5-irq-fix-null-string-in-debug-print.patch
@@ -0,0 +1,42 @@
+From 9d779a68a89614f61d7795a6a03b3cf510a7dae8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Feb 2025 09:26:08 +0200
+Subject: net/mlx5: IRQ, Fix null string in debug print
+
+From: Shay Drory <shayd@nvidia.com>
+
+[ Upstream commit 2f5a6014eb168a97b24153adccfa663d3b282767 ]
+
+irq_pool_alloc() debug print can print a null string.
+Fix it by providing a default string to print.
+
+Fixes: 71e084e26414 ("net/mlx5: Allocating a pool of MSI-X vectors for SFs")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202501141055.SwfIphN0-lkp@intel.com/
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
+Link: https://patch.msgid.link/20250225072608.526866-4-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+index a6d3fc96e1685..10b9dc2aaf06f 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+@@ -513,7 +513,7 @@ irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
+       pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
+       pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
+       mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
+-                    name, size, start);
++                    name ? name : "mlx5_pcif_pool", size, start);
+       return pool;
+ }
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.1/net-mvpp2-cls-fixed-non-ip-flow-with-vlan-tag-flow-d.patch b/queue-6.1/net-mvpp2-cls-fixed-non-ip-flow-with-vlan-tag-flow-d.patch

new file mode 100644 (file)

index 0000000..26dfb30
--- /dev/null
+++ b/queue-6.1/net-mvpp2-cls-fixed-non-ip-flow-with-vlan-tag-flow-d.patch
@@ -0,0 +1,40 @@
+From 0ab9b9d1325124e867cfa43f9acb1a500ed865b0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Feb 2025 20:20:58 -0800
+Subject: net: mvpp2: cls: Fixed Non IP flow, with vlan tag flow defination.
+
+From: Harshal Chaudhari <hchaudhari@marvell.com>
+
+[ Upstream commit 2d253726ff7106b39a44483b6864398bba8a2f74 ]
+
+Non IP flow, with vlan tag not working as expected while
+running below command for vlan-priority. fixed that.
+
+ethtool -N eth1 flow-type ether vlan 0x8000 vlan-mask 0x1fff action 0 loc 0
+
+Fixes: 1274daede3ef ("net: mvpp2: cls: Add steering based on vlan Id and priority.")
+Signed-off-by: Harshal Chaudhari <hchaudhari@marvell.com>
+Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
+Link: https://patch.msgid.link/20250225042058.2643838-1-hchaudhari@marvell.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
+index 40aeaa7bd739f..d2757cc116139 100644
+--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
+@@ -324,7 +324,7 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = {
+                      MVPP2_PRS_RI_VLAN_MASK),
+       /* Non IP flow, with vlan tag */
+       MVPP2_DEF_FLOW(MVPP22_FLOW_ETHERNET, MVPP2_FL_NON_IP_TAG,
+-                     MVPP22_CLS_HEK_OPT_VLAN,
++                     MVPP22_CLS_HEK_TAGGED,
+                      0, 0),
+ };
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.1/net-set-the-minimum-for-net_hotdata.netdev_budget_us.patch b/queue-6.1/net-set-the-minimum-for-net_hotdata.netdev_budget_us.patch

new file mode 100644 (file)

index 0000000..e212771
--- /dev/null
+++ b/queue-6.1/net-set-the-minimum-for-net_hotdata.netdev_budget_us.patch
@@ -0,0 +1,58 @@
+From b1d4a55ec10fb6d2c11e13f89f1816e027267147 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Feb 2025 12:07:52 +0100
+Subject: net: set the minimum for net_hotdata.netdev_budget_usecs
+
+From: Jiri Slaby (SUSE) <jirislaby@kernel.org>
+
+[ Upstream commit c180188ec02281126045414e90d08422a80f75b4 ]
+
+Commit 7acf8a1e8a28 ("Replace 2 jiffies with sysctl netdev_budget_usecs
+to enable softirq tuning") added a possibility to set
+net_hotdata.netdev_budget_usecs, but added no lower bound checking.
+
+Commit a4837980fd9f ("net: revert default NAPI poll timeout to 2 jiffies")
+made the *initial* value HZ-dependent, so the initial value is at least
+2 jiffies even for lower HZ values (2 ms for 1000 Hz, 8ms for 250 Hz, 20
+ms for 100 Hz).
+
+But a user still can set improper values by a sysctl. Set .extra1
+(the lower bound) for net_hotdata.netdev_budget_usecs to the same value
+as in the latter commit. That is to 2 jiffies.
+
+Fixes: a4837980fd9f ("net: revert default NAPI poll timeout to 2 jiffies")
+Fixes: 7acf8a1e8a28 ("Replace 2 jiffies with sysctl netdev_budget_usecs to enable softirq tuning")
+Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
+Cc: Dmitry Yakunin <zeil@yandex-team.ru>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Link: https://patch.msgid.link/20250220110752.137639-1-jirislaby@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sysctl_net_core.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
+index 47ca6d3ddbb56..75efc712bb9bc 100644
+--- a/net/core/sysctl_net_core.c
++++ b/net/core/sysctl_net_core.c
+@@ -30,6 +30,7 @@ static int min_sndbuf = SOCK_MIN_SNDBUF;
+ static int min_rcvbuf = SOCK_MIN_RCVBUF;
+ static int max_skb_frags = MAX_SKB_FRAGS;
+ static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE;
++static int netdev_budget_usecs_min = 2 * USEC_PER_SEC / HZ;
+ 
+ static int net_msg_warn;      /* Unused, but still a sysctl */
+ 
+@@ -554,7 +555,7 @@ static struct ctl_table net_core_table[] = {
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+-              .extra1         = SYSCTL_ZERO,
++              .extra1         = &netdev_budget_usecs_min,
+       },
+       {
+               .procname       = "fb_tunnels_only_for_init_net",
+-- 
+2.39.5
+
diff --git a/queue-6.1/series b/queue-6.1/series

index 64540815b6fbdb3edf0c48b0ef00636238c62c9d..f4af24f3e23715de0681050b22b253067d91942f 100644 (file)
--- a/queue-6.1/series
+++ b/queue-6.1/series
@@ -113,3 +113,30 @@ rdma-mlx5-fix-ah-static-rate-parsing.patch
  scsi-core-clear-driver-private-data-when-retrying-re.patch
  rdma-mlx5-fix-bind-qp-error-cleanup-flow.patch
  sunrpc-suppress-warnings-for-unused-procfs-functions.patch
+alsa-usb-audio-avoid-dropping-midi-events-at-closing.patch
+bluetooth-l2cap-fix-l2cap_ecred_conn_rsp-response.patch
+afs-remove-variable-nr_servers.patch
+afs-make-it-possible-to-find-the-volumes-that-are-us.patch
+afs-fix-the-server_list-to-unuse-a-displaced-server-.patch
+net-loopback-avoid-sending-ip-packets-without-an-eth.patch
+net-set-the-minimum-for-net_hotdata.netdev_budget_us.patch
+net-ipv4-add-tracepoint-for-icmp_send.patch
+ipv4-icmp-pass-full-ds-field-to-ip_route_input.patch
+ipv4-icmp-unmask-upper-dscp-bits-in-icmp_route_looku.patch
+ipvlan-unmask-upper-dscp-bits-in-ipvlan_process_v4_o.patch
+ipv4-convert-icmp_route_lookup-to-dscp_t.patch
+ipv4-convert-ip_route_input-to-dscp_t.patch
+ipvlan-prepare-ipvlan_process_v4_outbound-to-future-.patch
+ipvlan-ensure-network-headers-are-in-skb-linear-part.patch
+net-cadence-macb-synchronize-stats-calculations.patch
+asoc-es8328-fix-route-from-dac-to-output.patch
+ipvs-always-clear-ipvs_property-flag-in-skb_scrub_pa.patch
+tcp-defer-ts_recent-changes-until-req-is-owned.patch
+net-clear-old-fragment-checksum-value-in-napi_reuse_.patch
+net-mvpp2-cls-fixed-non-ip-flow-with-vlan-tag-flow-d.patch
+net-mlx5-irq-fix-null-string-in-debug-print.patch
+include-net-add-static-inline-dst_dev_overhead-to-ds.patch
+net-ipv6-seg6_iptunnel-mitigate-2-realloc-issue.patch
+net-ipv6-fix-dst-ref-loop-on-input-in-seg6-lwt.patch
+net-ipv6-rpl_iptunnel-mitigate-2-realloc-issue.patch
+net-ipv6-fix-dst-ref-loop-on-input-in-rpl-lwt.patch
diff --git a/queue-6.1/tcp-defer-ts_recent-changes-until-req-is-owned.patch b/queue-6.1/tcp-defer-ts_recent-changes-until-req-is-owned.patch

new file mode 100644 (file)

index 0000000..1681253
--- /dev/null
+++ b/queue-6.1/tcp-defer-ts_recent-changes-until-req-is-owned.patch
@@ -0,0 +1,92 @@
+From 5fc9d620bffc5b5e9d907f4afc4987ef50ddef8e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Feb 2025 17:00:47 +0800
+Subject: tcp: Defer ts_recent changes until req is owned
+
+From: Wang Hai <wanghai38@huawei.com>
+
+[ Upstream commit 8d52da23b6c68a0f6bad83959ebb61a2cf623c4e ]
+
+Recently a bug was discovered where the server had entered TCP_ESTABLISHED
+state, but the upper layers were not notified.
+
+The same 5-tuple packet may be processed by different CPUSs, so two
+CPUs may receive different ack packets at the same time when the
+state is TCP_NEW_SYN_RECV.
+
+In that case, req->ts_recent in tcp_check_req may be changed concurrently,
+which will probably cause the newsk's ts_recent to be incorrectly large.
+So that tcp_validate_incoming will fail. At this point, newsk will not be
+able to enter the TCP_ESTABLISHED.
+
+cpu1                                    cpu2
+tcp_check_req
+                                        tcp_check_req
+ req->ts_recent = rcv_tsval = t1
+                                         req->ts_recent = rcv_tsval = t2
+
+ syn_recv_sock
+  tcp_sk(child)->rx_opt.ts_recent = req->ts_recent = t2 // t1 < t2
+tcp_child_process
+ tcp_rcv_state_process
+  tcp_validate_incoming
+   tcp_paws_check
+    if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
+        // t2 - t1 > paws_win, failed
+                                        tcp_v4_do_rcv
+                                         tcp_rcv_state_process
+                                         // TCP_ESTABLISHED
+
+The cpu2's skb or a newly received skb will call tcp_v4_do_rcv to get
+the newsk into the TCP_ESTABLISHED state, but at this point it is no
+longer possible to notify the upper layer application. A notification
+mechanism could be added here, but the fix is more complex, so the
+current fix is used.
+
+In tcp_check_req, req->ts_recent is used to assign a value to
+tcp_sk(child)->rx_opt.ts_recent, so removing the change in req->ts_recent
+and changing tcp_sk(child)->rx_opt.ts_recent directly after owning the
+req fixes this bug.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Wang Hai <wanghai38@huawei.com>
+Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_minisocks.c | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
+index c562cb965e742..bc94df0140bfd 100644
+--- a/net/ipv4/tcp_minisocks.c
++++ b/net/ipv4/tcp_minisocks.c
+@@ -735,12 +735,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
+ 
+       /* In sequence, PAWS is OK. */
+ 
+-      /* TODO: We probably should defer ts_recent change once
+-       * we take ownership of @req.
+-       */
+-      if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
+-              WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval);
+-
+       if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
+               /* Truncate SYN, it is out of window starting
+                  at tcp_rsk(req)->rcv_isn + 1. */
+@@ -789,6 +783,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
+       if (!child)
+               goto listen_overflow;
+ 
++      if (own_req && tmp_opt.saw_tstamp &&
++          !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
++              tcp_sk(child)->rx_opt.ts_recent = tmp_opt.rcv_tsval;
++
+       if (own_req && rsk_drop_req(req)) {
+               reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req);
+               inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req);
+-- 
+2.39.5
+
author	Sasha Levin <sashal@kernel.org>
	Sat, 1 Mar 2025 14:19:53 +0000 (09:19 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Sat, 1 Mar 2025 14:19:53 +0000 (09:19 -0500)
queue-6.1/afs-fix-the-server_list-to-unuse-a-displaced-server-.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/afs-make-it-possible-to-find-the-volumes-that-are-us.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/afs-remove-variable-nr_servers.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/alsa-usb-audio-avoid-dropping-midi-events-at-closing.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/asoc-es8328-fix-route-from-dac-to-output.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bluetooth-l2cap-fix-l2cap_ecred_conn_rsp-response.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/include-net-add-static-inline-dst_dev_overhead-to-ds.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/ipv4-convert-icmp_route_lookup-to-dscp_t.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/ipv4-convert-ip_route_input-to-dscp_t.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/ipv4-icmp-pass-full-ds-field-to-ip_route_input.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/ipv4-icmp-unmask-upper-dscp-bits-in-icmp_route_looku.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/ipvlan-ensure-network-headers-are-in-skb-linear-part.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/ipvlan-prepare-ipvlan_process_v4_outbound-to-future-.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/ipvlan-unmask-upper-dscp-bits-in-ipvlan_process_v4_o.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/ipvs-always-clear-ipvs_property-flag-in-skb_scrub_pa.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-cadence-macb-synchronize-stats-calculations.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-clear-old-fragment-checksum-value-in-napi_reuse_.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-ipv4-add-tracepoint-for-icmp_send.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-ipv6-fix-dst-ref-loop-on-input-in-rpl-lwt.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-ipv6-fix-dst-ref-loop-on-input-in-seg6-lwt.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-ipv6-rpl_iptunnel-mitigate-2-realloc-issue.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-ipv6-seg6_iptunnel-mitigate-2-realloc-issue.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-loopback-avoid-sending-ip-packets-without-an-eth.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-mlx5-irq-fix-null-string-in-debug-print.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-mvpp2-cls-fixed-non-ip-flow-with-vlan-tag-flow-d.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-set-the-minimum-for-net_hotdata.netdev_budget_us.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/series		patch \| blob \| blame \| history
queue-6.1/tcp-defer-ts_recent-changes-until-req-is-owned.patch	[new file with mode: 0644]	patch \| blob