From: Sasha Levin Date: Sat, 1 Mar 2025 14:19:53 +0000 (-0500) Subject: Fixes for 6.1 X-Git-Tag: v6.6.81~44 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=128c06fc94a8480a90638d3037bd28396a58e9e9;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.1 Signed-off-by: Sasha Levin --- diff --git a/queue-6.1/afs-fix-the-server_list-to-unuse-a-displaced-server-.patch b/queue-6.1/afs-fix-the-server_list-to-unuse-a-displaced-server-.patch new file mode 100644 index 0000000000..f6df08c76b --- /dev/null +++ b/queue-6.1/afs-fix-the-server_list-to-unuse-a-displaced-server-.patch @@ -0,0 +1,59 @@ +From 1ba4f8c739fc4511f1da171793ba63861980e61b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Feb 2025 19:22:47 +0000 +Subject: afs: Fix the server_list to unuse a displaced server rather than + putting it + +From: David Howells + +[ Upstream commit add117e48df4788a86a21bd0515833c0a6db1ad1 ] + +When allocating and building an afs_server_list struct object from a VLDB +record, we look up each server address to get the server record for it - +but a server may have more than one entry in the record and we discard the +duplicate pointers. Currently, however, when we discard, we only put a +server record, not unuse it - but the lookup got as an active-user count. + +The active-user count on an afs_server_list object determines its lifetime +whereas the refcount keeps the memory backing it around. Failing to reduce +the active-user counter prevents the record from being cleaned up and can +lead to multiple copied being seen - and pointing to deleted afs_cell +objects and other such things. + +Fix this by switching the incorrect 'put' to an 'unuse' instead. + +Without this, occasionally, a dead server record can be seen in +/proc/net/afs/servers and list corruption may be observed: + + list_del corruption. prev->next should be ffff888102423e40, but was 0000000000000000. (prev=ffff88810140cd38) + +Fixes: 977e5f8ed0ab ("afs: Split the usage count on struct afs_server") +Signed-off-by: David Howells +cc: Marc Dionne +cc: Simon Horman +cc: linux-afs@lists.infradead.org +Link: https://patch.msgid.link/20250218192250.296870-5-dhowells@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + fs/afs/server_list.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c +index 4d6369477f54e..89c75d934f79e 100644 +--- a/fs/afs/server_list.c ++++ b/fs/afs/server_list.c +@@ -67,8 +67,8 @@ struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume, + break; + if (j < slist->nr_servers) { + if (slist->servers[j].server == server) { +- afs_put_server(volume->cell->net, server, +- afs_server_trace_put_slist_isort); ++ afs_unuse_server(volume->cell->net, server, ++ afs_server_trace_put_slist_isort); + continue; + } + +-- +2.39.5 + diff --git a/queue-6.1/afs-make-it-possible-to-find-the-volumes-that-are-us.patch b/queue-6.1/afs-make-it-possible-to-find-the-volumes-that-are-us.patch new file mode 100644 index 0000000000..a007a80b10 --- /dev/null +++ b/queue-6.1/afs-make-it-possible-to-find-the-volumes-that-are-us.patch @@ -0,0 +1,417 @@ +From 61857ffa217b971dad62722eddd225e93da7c12f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Nov 2023 16:08:43 +0000 +Subject: afs: Make it possible to find the volumes that are using a server + +From: David Howells + +[ Upstream commit ca0e79a46097d54e4af46c67c852479d97af35bb ] + +Make it possible to find the afs_volume structs that are using an +afs_server struct to aid in breaking volume callbacks. + +The way this is done is that each afs_volume already has an array of +afs_server_entry records that point to the servers where that volume might +be found. An afs_volume backpointer and a list node is added to each entry +and each entry is then added to an RCU-traversable list on the afs_server +to which it points. + +Signed-off-by: David Howells +cc: Marc Dionne +cc: linux-afs@lists.infradead.org +Stable-dep-of: add117e48df4 ("afs: Fix the server_list to unuse a displaced server rather than putting it") +Signed-off-by: Sasha Levin +--- + fs/afs/cell.c | 1 + + fs/afs/internal.h | 23 +++++---- + fs/afs/server.c | 1 + + fs/afs/server_list.c | 112 +++++++++++++++++++++++++++++++++++++++---- + fs/afs/vl_alias.c | 2 +- + fs/afs/volume.c | 36 ++++++++------ + 6 files changed, 143 insertions(+), 32 deletions(-) + +diff --git a/fs/afs/cell.c b/fs/afs/cell.c +index 926cb1188eba6..7c0dce8eecadd 100644 +--- a/fs/afs/cell.c ++++ b/fs/afs/cell.c +@@ -161,6 +161,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, + refcount_set(&cell->ref, 1); + atomic_set(&cell->active, 0); + INIT_WORK(&cell->manager, afs_manage_cell_work); ++ spin_lock_init(&cell->vs_lock); + cell->volumes = RB_ROOT; + INIT_HLIST_HEAD(&cell->proc_volumes); + seqlock_init(&cell->volume_lock); +diff --git a/fs/afs/internal.h b/fs/afs/internal.h +index 097d5a5f07b1a..fd4310272ccc1 100644 +--- a/fs/afs/internal.h ++++ b/fs/afs/internal.h +@@ -378,6 +378,7 @@ struct afs_cell { + unsigned int debug_id; + + /* The volumes belonging to this cell */ ++ spinlock_t vs_lock; /* Lock for server->volumes */ + struct rb_root volumes; /* Tree of volumes on this server */ + struct hlist_head proc_volumes; /* procfs volume list */ + seqlock_t volume_lock; /* For volumes */ +@@ -501,6 +502,7 @@ struct afs_server { + struct hlist_node addr4_link; /* Link in net->fs_addresses4 */ + struct hlist_node addr6_link; /* Link in net->fs_addresses6 */ + struct hlist_node proc_link; /* Link in net->fs_proc */ ++ struct list_head volumes; /* RCU list of afs_server_entry objects */ + struct work_struct initcb_work; /* Work for CB.InitCallBackState* */ + struct afs_server *gc_next; /* Next server in manager's list */ + time64_t unuse_time; /* Time at which last unused */ +@@ -549,12 +551,14 @@ struct afs_server { + */ + struct afs_server_entry { + struct afs_server *server; ++ struct afs_volume *volume; ++ struct list_head slink; /* Link in server->volumes */ + }; + + struct afs_server_list { + struct rcu_head rcu; +- afs_volid_t vids[AFS_MAXTYPES]; /* Volume IDs */ + refcount_t usage; ++ bool attached; /* T if attached to servers */ + unsigned char nr_servers; + unsigned char preferred; /* Preferred server */ + unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */ +@@ -567,10 +571,9 @@ struct afs_server_list { + * Live AFS volume management. + */ + struct afs_volume { +- union { +- struct rcu_head rcu; +- afs_volid_t vid; /* volume ID */ +- }; ++ struct rcu_head rcu; ++ afs_volid_t vid; /* The volume ID of this volume */ ++ afs_volid_t vids[AFS_MAXTYPES]; /* All associated volume IDs */ + refcount_t ref; + time64_t update_at; /* Time at which to next update */ + struct afs_cell *cell; /* Cell to which belongs (pins ref) */ +@@ -1450,10 +1453,14 @@ static inline struct afs_server_list *afs_get_serverlist(struct afs_server_list + } + + extern void afs_put_serverlist(struct afs_net *, struct afs_server_list *); +-extern struct afs_server_list *afs_alloc_server_list(struct afs_cell *, struct key *, +- struct afs_vldb_entry *, +- u8); ++struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume, ++ struct key *key, ++ struct afs_vldb_entry *vldb); + extern bool afs_annotate_server_list(struct afs_server_list *, struct afs_server_list *); ++void afs_attach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *slist); ++void afs_reattach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *slist, ++ struct afs_server_list *old); ++void afs_detach_volume_from_servers(struct afs_volume *volume, struct afs_server_list *slist); + + /* + * super.c +diff --git a/fs/afs/server.c b/fs/afs/server.c +index 0bd2f5ba6900c..87381c2ffe374 100644 +--- a/fs/afs/server.c ++++ b/fs/afs/server.c +@@ -236,6 +236,7 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell, + server->addr_version = alist->version; + server->uuid = *uuid; + rwlock_init(&server->fs_lock); ++ INIT_LIST_HEAD(&server->volumes); + INIT_WORK(&server->initcb_work, afs_server_init_callback_work); + init_waitqueue_head(&server->probe_wq); + INIT_LIST_HEAD(&server->probe_link); +diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c +index b59896b1de0af..4d6369477f54e 100644 +--- a/fs/afs/server_list.c ++++ b/fs/afs/server_list.c +@@ -24,13 +24,13 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist) + /* + * Build a server list from a VLDB record. + */ +-struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, ++struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume, + struct key *key, +- struct afs_vldb_entry *vldb, +- u8 type_mask) ++ struct afs_vldb_entry *vldb) + { + struct afs_server_list *slist; + struct afs_server *server; ++ unsigned int type_mask = 1 << volume->type; + int ret = -ENOMEM, nr_servers = 0, i, j; + + for (i = 0; i < vldb->nr_servers; i++) +@@ -44,15 +44,12 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, + refcount_set(&slist->usage, 1); + rwlock_init(&slist->lock); + +- for (i = 0; i < AFS_MAXTYPES; i++) +- slist->vids[i] = vldb->vid[i]; +- + /* Make sure a records exists for each server in the list. */ + for (i = 0; i < vldb->nr_servers; i++) { + if (!(vldb->fs_mask[i] & type_mask)) + continue; + +- server = afs_lookup_server(cell, key, &vldb->fs_server[i], ++ server = afs_lookup_server(volume->cell, key, &vldb->fs_server[i], + vldb->addr_version[i]); + if (IS_ERR(server)) { + ret = PTR_ERR(server); +@@ -70,7 +67,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, + break; + if (j < slist->nr_servers) { + if (slist->servers[j].server == server) { +- afs_put_server(cell->net, server, ++ afs_put_server(volume->cell->net, server, + afs_server_trace_put_slist_isort); + continue; + } +@@ -81,6 +78,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, + } + + slist->servers[j].server = server; ++ slist->servers[j].volume = volume; + slist->nr_servers++; + } + +@@ -92,7 +90,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, + return slist; + + error_2: +- afs_put_serverlist(cell->net, slist); ++ afs_put_serverlist(volume->cell->net, slist); + error: + return ERR_PTR(ret); + } +@@ -127,3 +125,99 @@ bool afs_annotate_server_list(struct afs_server_list *new, + + return true; + } ++ ++/* ++ * Attach a volume to the servers it is going to use. ++ */ ++void afs_attach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *slist) ++{ ++ struct afs_server_entry *se, *pe; ++ struct afs_server *server; ++ struct list_head *p; ++ unsigned int i; ++ ++ spin_lock(&volume->cell->vs_lock); ++ ++ for (i = 0; i < slist->nr_servers; i++) { ++ se = &slist->servers[i]; ++ server = se->server; ++ ++ list_for_each(p, &server->volumes) { ++ pe = list_entry(p, struct afs_server_entry, slink); ++ if (volume->vid <= pe->volume->vid) ++ break; ++ } ++ list_add_tail_rcu(&se->slink, p); ++ } ++ ++ slist->attached = true; ++ spin_unlock(&volume->cell->vs_lock); ++} ++ ++/* ++ * Reattach a volume to the servers it is going to use when server list is ++ * replaced. We try to switch the attachment points to avoid rewalking the ++ * lists. ++ */ ++void afs_reattach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *new, ++ struct afs_server_list *old) ++{ ++ unsigned int n = 0, o = 0; ++ ++ spin_lock(&volume->cell->vs_lock); ++ ++ while (n < new->nr_servers || o < old->nr_servers) { ++ struct afs_server_entry *pn = n < new->nr_servers ? &new->servers[n] : NULL; ++ struct afs_server_entry *po = o < old->nr_servers ? &old->servers[o] : NULL; ++ struct afs_server_entry *s; ++ struct list_head *p; ++ int diff; ++ ++ if (pn && po && pn->server == po->server) { ++ list_replace_rcu(&po->slink, &pn->slink); ++ n++; ++ o++; ++ continue; ++ } ++ ++ if (pn && po) ++ diff = memcmp(&pn->server->uuid, &po->server->uuid, ++ sizeof(pn->server->uuid)); ++ else ++ diff = pn ? -1 : 1; ++ ++ if (diff < 0) { ++ list_for_each(p, &pn->server->volumes) { ++ s = list_entry(p, struct afs_server_entry, slink); ++ if (volume->vid <= s->volume->vid) ++ break; ++ } ++ list_add_tail_rcu(&pn->slink, p); ++ n++; ++ } else { ++ list_del_rcu(&po->slink); ++ o++; ++ } ++ } ++ ++ spin_unlock(&volume->cell->vs_lock); ++} ++ ++/* ++ * Detach a volume from the servers it has been using. ++ */ ++void afs_detach_volume_from_servers(struct afs_volume *volume, struct afs_server_list *slist) ++{ ++ unsigned int i; ++ ++ if (!slist->attached) ++ return; ++ ++ spin_lock(&volume->cell->vs_lock); ++ ++ for (i = 0; i < slist->nr_servers; i++) ++ list_del_rcu(&slist->servers[i].slink); ++ ++ slist->attached = false; ++ spin_unlock(&volume->cell->vs_lock); ++} +diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c +index 83cf1bfbe343a..b2cc10df95308 100644 +--- a/fs/afs/vl_alias.c ++++ b/fs/afs/vl_alias.c +@@ -126,7 +126,7 @@ static int afs_compare_volume_slists(const struct afs_volume *vol_a, + lb = rcu_dereference(vol_b->servers); + + for (i = 0; i < AFS_MAXTYPES; i++) +- if (la->vids[i] != lb->vids[i]) ++ if (vol_a->vids[i] != vol_b->vids[i]) + return 0; + + while (a < la->nr_servers && b < lb->nr_servers) { +diff --git a/fs/afs/volume.c b/fs/afs/volume.c +index c028598a903c9..0f64b97581272 100644 +--- a/fs/afs/volume.c ++++ b/fs/afs/volume.c +@@ -72,11 +72,11 @@ static void afs_remove_volume_from_cell(struct afs_volume *volume) + */ + static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, + struct afs_vldb_entry *vldb, +- unsigned long type_mask) ++ struct afs_server_list **_slist) + { + struct afs_server_list *slist; + struct afs_volume *volume; +- int ret = -ENOMEM; ++ int ret = -ENOMEM, i; + + volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL); + if (!volume) +@@ -95,13 +95,16 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, + rwlock_init(&volume->cb_v_break_lock); + memcpy(volume->name, vldb->name, vldb->name_len + 1); + +- slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask); ++ for (i = 0; i < AFS_MAXTYPES; i++) ++ volume->vids[i] = vldb->vid[i]; ++ ++ slist = afs_alloc_server_list(volume, params->key, vldb); + if (IS_ERR(slist)) { + ret = PTR_ERR(slist); + goto error_1; + } + +- refcount_set(&slist->usage, 1); ++ *_slist = slist; + rcu_assign_pointer(volume->servers, slist); + trace_afs_volume(volume->vid, 1, afs_volume_trace_alloc); + return volume; +@@ -117,17 +120,19 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, + * Look up or allocate a volume record. + */ + static struct afs_volume *afs_lookup_volume(struct afs_fs_context *params, +- struct afs_vldb_entry *vldb, +- unsigned long type_mask) ++ struct afs_vldb_entry *vldb) + { ++ struct afs_server_list *slist; + struct afs_volume *candidate, *volume; + +- candidate = afs_alloc_volume(params, vldb, type_mask); ++ candidate = afs_alloc_volume(params, vldb, &slist); + if (IS_ERR(candidate)) + return candidate; + + volume = afs_insert_volume_into_cell(params->cell, candidate); +- if (volume != candidate) ++ if (volume == candidate) ++ afs_attach_volume_to_servers(volume, slist); ++ else + afs_put_volume(params->net, candidate, afs_volume_trace_put_cell_dup); + return volume; + } +@@ -208,8 +213,7 @@ struct afs_volume *afs_create_volume(struct afs_fs_context *params) + goto error; + } + +- type_mask = 1UL << params->type; +- volume = afs_lookup_volume(params, vldb, type_mask); ++ volume = afs_lookup_volume(params, vldb); + + error: + kfree(vldb); +@@ -221,14 +225,17 @@ struct afs_volume *afs_create_volume(struct afs_fs_context *params) + */ + static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume) + { ++ struct afs_server_list *slist = rcu_access_pointer(volume->servers); ++ + _enter("%p", volume); + + #ifdef CONFIG_AFS_FSCACHE + ASSERTCMP(volume->cache, ==, NULL); + #endif + ++ afs_detach_volume_from_servers(volume, slist); + afs_remove_volume_from_cell(volume); +- afs_put_serverlist(net, rcu_access_pointer(volume->servers)); ++ afs_put_serverlist(net, slist); + afs_put_cell(volume->cell, afs_cell_trace_put_vol); + trace_afs_volume(volume->vid, refcount_read(&volume->ref), + afs_volume_trace_free); +@@ -362,8 +369,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) + } + + /* See if the volume's server list got updated. */ +- new = afs_alloc_server_list(volume->cell, key, +- vldb, (1 << volume->type)); ++ new = afs_alloc_server_list(volume, key, vldb); + if (IS_ERR(new)) { + ret = PTR_ERR(new); + goto error_vldb; +@@ -384,9 +390,11 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) + + volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; + write_unlock(&volume->servers_lock); +- ret = 0; + ++ if (discard == old) ++ afs_reattach_volume_to_servers(volume, new, old); + afs_put_serverlist(volume->cell->net, discard); ++ ret = 0; + error_vldb: + kfree(vldb); + error: +-- +2.39.5 + diff --git a/queue-6.1/afs-remove-variable-nr_servers.patch b/queue-6.1/afs-remove-variable-nr_servers.patch new file mode 100644 index 0000000000..7a8b2eb572 --- /dev/null +++ b/queue-6.1/afs-remove-variable-nr_servers.patch @@ -0,0 +1,44 @@ +From abd023b40e52ce60463652114238de75a324d04f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Oct 2022 18:39:23 +0100 +Subject: afs: remove variable nr_servers + +From: Colin Ian King + +[ Upstream commit 318b83b71242998814a570c3420c042ee6165fca ] + +Variable nr_servers is no longer being used, the last reference +to it was removed in commit 45df8462730d ("afs: Fix server list handling") +so clean up the code by removing it. + +Signed-off-by: Colin Ian King +Signed-off-by: David Howells +cc: Marc Dionne +cc: linux-afs@lists.infradead.org +Link: https://lore.kernel.org/r/20221020173923.21342-1-colin.i.king@gmail.com/ +Stable-dep-of: add117e48df4 ("afs: Fix the server_list to unuse a displaced server rather than putting it") +Signed-off-by: Sasha Levin +--- + fs/afs/volume.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/fs/afs/volume.c b/fs/afs/volume.c +index a146d70efa650..c028598a903c9 100644 +--- a/fs/afs/volume.c ++++ b/fs/afs/volume.c +@@ -76,11 +76,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, + { + struct afs_server_list *slist; + struct afs_volume *volume; +- int ret = -ENOMEM, nr_servers = 0, i; +- +- for (i = 0; i < vldb->nr_servers; i++) +- if (vldb->fs_mask[i] & type_mask) +- nr_servers++; ++ int ret = -ENOMEM; + + volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL); + if (!volume) +-- +2.39.5 + diff --git a/queue-6.1/alsa-usb-audio-avoid-dropping-midi-events-at-closing.patch b/queue-6.1/alsa-usb-audio-avoid-dropping-midi-events-at-closing.patch new file mode 100644 index 0000000000..949be5a56e --- /dev/null +++ b/queue-6.1/alsa-usb-audio-avoid-dropping-midi-events-at-closing.patch @@ -0,0 +1,47 @@ +From 868e655e0ba14e36ff87267557a61b710e48a6dd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Feb 2025 12:40:24 +0100 +Subject: ALSA: usb-audio: Avoid dropping MIDI events at closing multiple ports + +From: Takashi Iwai + +[ Upstream commit a3bdd8f5c2217e1cb35db02c2eed36ea20fb50f5 ] + +We fixed the UAF issue in USB MIDI code by canceling the pending work +at closing each MIDI output device in the commit below. However, this +assumed that it's the only one that is tied with the endpoint, and it +resulted in unexpected data truncations when multiple devices are +assigned to a single endpoint and opened simultaneously. + +For addressing the unexpected MIDI message drops, simply replace +cancel_work_sync() with flush_work(). The drain callback should have +been already invoked before the close callback, hence the port->active +flag must be already cleared. So this just assures that the pending +work is finished before freeing the resources. + +Fixes: 0125de38122f ("ALSA: usb-audio: Cancel pending work at closing a MIDI substream") +Reported-and-tested-by: John Keeping +Closes: https://lore.kernel.org/20250217111647.3368132-1-jkeeping@inmusicbrands.com +Link: https://patch.msgid.link/20250218114024.23125-1-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/usb/midi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/sound/usb/midi.c b/sound/usb/midi.c +index 2839f6b6f09b4..eed71369c7af2 100644 +--- a/sound/usb/midi.c ++++ b/sound/usb/midi.c +@@ -1145,7 +1145,7 @@ static int snd_usbmidi_output_close(struct snd_rawmidi_substream *substream) + { + struct usbmidi_out_port *port = substream->runtime->private_data; + +- cancel_work_sync(&port->ep->work); ++ flush_work(&port->ep->work); + return substream_open(substream, 0, 0); + } + +-- +2.39.5 + diff --git a/queue-6.1/asoc-es8328-fix-route-from-dac-to-output.patch b/queue-6.1/asoc-es8328-fix-route-from-dac-to-output.patch new file mode 100644 index 0000000000..fc71db60d3 --- /dev/null +++ b/queue-6.1/asoc-es8328-fix-route-from-dac-to-output.patch @@ -0,0 +1,110 @@ +From 7324798703ecd45fed10a1ed326cc27dd6fa8c62 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 22 Feb 2025 20:39:57 +0100 +Subject: ASoC: es8328: fix route from DAC to output + +From: Nicolas Frattaroli + +[ Upstream commit 5b0c02f9b8acf2a791e531bbc09acae2d51f4f9b ] + +The ES8328 codec driver, which is also used for the ES8388 chip that +appears to have an identical register map, claims that the output can +either take the route from DAC->Mixer->Output or through DAC->Output +directly. To the best of what I could find, this is not true, and +creates problems. + +Without DACCONTROL17 bit index 7 set for the left channel, as well as +DACCONTROL20 bit index 7 set for the right channel, I cannot get any +analog audio out on Left Out 2 and Right Out 2 respectively, despite the +DAPM routes claiming that this should be possible. Furthermore, the same +is the case for Left Out 1 and Right Out 1, showing that those two don't +have a direct route from DAC to output bypassing the mixer either. + +Those control bits toggle whether the DACs are fed (stale bread?) into +their respective mixers. If one "unmutes" the mixer controls in +alsamixer, then sure, the audio output works, but if it doesn't work +without the mixer being fed the DAC input then evidently it's not a +direct output from the DAC. + +ES8328/ES8388 are seemingly not alone in this. ES8323, which uses a +separate driver for what appears to be a very similar register map, +simply flips those two bits on in its probe function, and then pretends +there is no power management whatsoever for the individual controls. +Fair enough. + +My theory as to why nobody has noticed this up to this point is that +everyone just assumes it's their fault when they had to unmute an +additional control in ALSA. + +Fix this in the es8328 driver by removing the erroneous direct route, +then get rid of the playback switch controls and have those bits tied to +the mixer's widget instead, which until now had no register to play +with. + +Fixes: 567e4f98922c ("ASoC: add es8328 codec driver") +Signed-off-by: Nicolas Frattaroli +Link: https://patch.msgid.link/20250222-es8328-route-bludgeoning-v1-1-99bfb7fb22d9@collabora.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/codecs/es8328.c | 15 ++++----------- + 1 file changed, 4 insertions(+), 11 deletions(-) + +diff --git a/sound/soc/codecs/es8328.c b/sound/soc/codecs/es8328.c +index 160adc706cc69..8182e9b37c03d 100644 +--- a/sound/soc/codecs/es8328.c ++++ b/sound/soc/codecs/es8328.c +@@ -234,7 +234,6 @@ static const struct snd_kcontrol_new es8328_right_line_controls = + + /* Left Mixer */ + static const struct snd_kcontrol_new es8328_left_mixer_controls[] = { +- SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL17, 7, 1, 0), + SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL17, 6, 1, 0), + SOC_DAPM_SINGLE("Right Playback Switch", ES8328_DACCONTROL18, 7, 1, 0), + SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL18, 6, 1, 0), +@@ -244,7 +243,6 @@ static const struct snd_kcontrol_new es8328_left_mixer_controls[] = { + static const struct snd_kcontrol_new es8328_right_mixer_controls[] = { + SOC_DAPM_SINGLE("Left Playback Switch", ES8328_DACCONTROL19, 7, 1, 0), + SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL19, 6, 1, 0), +- SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL20, 7, 1, 0), + SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL20, 6, 1, 0), + }; + +@@ -337,10 +335,10 @@ static const struct snd_soc_dapm_widget es8328_dapm_widgets[] = { + SND_SOC_DAPM_DAC("Left DAC", "Left Playback", ES8328_DACPOWER, + ES8328_DACPOWER_LDAC_OFF, 1), + +- SND_SOC_DAPM_MIXER("Left Mixer", SND_SOC_NOPM, 0, 0, ++ SND_SOC_DAPM_MIXER("Left Mixer", ES8328_DACCONTROL17, 7, 0, + &es8328_left_mixer_controls[0], + ARRAY_SIZE(es8328_left_mixer_controls)), +- SND_SOC_DAPM_MIXER("Right Mixer", SND_SOC_NOPM, 0, 0, ++ SND_SOC_DAPM_MIXER("Right Mixer", ES8328_DACCONTROL20, 7, 0, + &es8328_right_mixer_controls[0], + ARRAY_SIZE(es8328_right_mixer_controls)), + +@@ -419,19 +417,14 @@ static const struct snd_soc_dapm_route es8328_dapm_routes[] = { + { "Right Line Mux", "PGA", "Right PGA Mux" }, + { "Right Line Mux", "Differential", "Differential Mux" }, + +- { "Left Out 1", NULL, "Left DAC" }, +- { "Right Out 1", NULL, "Right DAC" }, +- { "Left Out 2", NULL, "Left DAC" }, +- { "Right Out 2", NULL, "Right DAC" }, +- +- { "Left Mixer", "Playback Switch", "Left DAC" }, ++ { "Left Mixer", NULL, "Left DAC" }, + { "Left Mixer", "Left Bypass Switch", "Left Line Mux" }, + { "Left Mixer", "Right Playback Switch", "Right DAC" }, + { "Left Mixer", "Right Bypass Switch", "Right Line Mux" }, + + { "Right Mixer", "Left Playback Switch", "Left DAC" }, + { "Right Mixer", "Left Bypass Switch", "Left Line Mux" }, +- { "Right Mixer", "Playback Switch", "Right DAC" }, ++ { "Right Mixer", NULL, "Right DAC" }, + { "Right Mixer", "Right Bypass Switch", "Right Line Mux" }, + + { "DAC DIG", NULL, "DAC STM" }, +-- +2.39.5 + diff --git a/queue-6.1/bluetooth-l2cap-fix-l2cap_ecred_conn_rsp-response.patch b/queue-6.1/bluetooth-l2cap-fix-l2cap_ecred_conn_rsp-response.patch new file mode 100644 index 0000000000..cfa8adcb84 --- /dev/null +++ b/queue-6.1/bluetooth-l2cap-fix-l2cap_ecred_conn_rsp-response.patch @@ -0,0 +1,99 @@ +From 719c2d3d6138e88d85b45cbb6a1000ba448e6078 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Feb 2025 10:30:25 -0500 +Subject: Bluetooth: L2CAP: Fix L2CAP_ECRED_CONN_RSP response + +From: Luiz Augusto von Dentz + +[ Upstream commit b25120e1d5f2ebb3db00af557709041f47f7f3d0 ] + +L2CAP_ECRED_CONN_RSP needs to respond DCID in the same order received as +SCID but the order is reversed due to use of list_add which actually +prepend channels to the list so the response is reversed: + +> ACL Data RX: Handle 16 flags 0x02 dlen 26 + LE L2CAP: Enhanced Credit Connection Request (0x17) ident 2 len 18 + PSM: 39 (0x0027) + MTU: 256 + MPS: 251 + Credits: 65535 + Source CID: 116 + Source CID: 117 + Source CID: 118 + Source CID: 119 + Source CID: 120 +< ACL Data TX: Handle 16 flags 0x00 dlen 26 + LE L2CAP: Enhanced Credit Connection Response (0x18) ident 2 len 18 + MTU: 517 + MPS: 247 + Credits: 3 + Result: Connection successful (0x0000) + Destination CID: 68 + Destination CID: 67 + Destination CID: 66 + Destination CID: 65 + Destination CID: 64 + +Also make sure the response don't include channels that are not on +BT_CONNECT2 since the chan->ident can be set to the same value as in the +following trace: + +< ACL Data TX: Handle 16 flags 0x00 dlen 12 + LE L2CAP: LE Flow Control Credit (0x16) ident 6 len 4 + Source CID: 64 + Credits: 1 +... +> ACL Data RX: Handle 16 flags 0x02 dlen 18 + LE L2CAP: Enhanced Credit Connection Request (0x17) ident 6 len 10 + PSM: 39 (0x0027) + MTU: 517 + MPS: 251 + Credits: 255 + Source CID: 70 +< ACL Data TX: Handle 16 flags 0x00 dlen 20 + LE L2CAP: Enhanced Credit Connection Response (0x18) ident 6 len 12 + MTU: 517 + MPS: 247 + Credits: 3 + Result: Connection successful (0x0000) + Destination CID: 64 + Destination CID: 68 + +Closes: https://github.com/bluez/bluez/issues/1094 +Fixes: 9aa9d9473f15 ("Bluetooth: L2CAP: Fix responding with wrong PDU type") +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/l2cap_core.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c +index 2a8051fae08c7..36d6122f2e12d 100644 +--- a/net/bluetooth/l2cap_core.c ++++ b/net/bluetooth/l2cap_core.c +@@ -656,7 +656,8 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) + test_bit(FLAG_HOLD_HCI_CONN, &chan->flags)) + hci_conn_hold(conn->hcon); + +- list_add(&chan->list, &conn->chan_l); ++ /* Append to the list since the order matters for ECRED */ ++ list_add_tail(&chan->list, &conn->chan_l); + } + + void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) +@@ -3995,7 +3996,11 @@ static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data) + { + struct l2cap_ecred_rsp_data *rsp = data; + +- if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) ++ /* Check if channel for outgoing connection or if it wasn't deferred ++ * since in those cases it must be skipped. ++ */ ++ if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags) || ++ !test_and_clear_bit(FLAG_DEFER_SETUP, &chan->flags)) + return; + + /* Reset ident so only one response is sent */ +-- +2.39.5 + diff --git a/queue-6.1/include-net-add-static-inline-dst_dev_overhead-to-ds.patch b/queue-6.1/include-net-add-static-inline-dst_dev_overhead-to-ds.patch new file mode 100644 index 0000000000..14b739d8a5 --- /dev/null +++ b/queue-6.1/include-net-add-static-inline-dst_dev_overhead-to-ds.patch @@ -0,0 +1,49 @@ +From 2a95bdb541af9b0db397f9fd9c6c45bd621b0c44 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Dec 2024 13:49:42 +0100 +Subject: include: net: add static inline dst_dev_overhead() to dst.h + +From: Justin Iurman + +[ Upstream commit 0600cf40e9b36fe17f9c9f04d4f9cef249eaa5e7 ] + +Add static inline dst_dev_overhead() function to include/net/dst.h. This +helper function is used by ioam6_iptunnel, rpl_iptunnel and +seg6_iptunnel to get the dev's overhead based on a cache entry +(dst_entry). If the cache is empty, the default and generic value +skb->mac_len is returned. Otherwise, LL_RESERVED_SPACE() over dst's dev +is returned. + +Signed-off-by: Justin Iurman +Cc: Alexander Lobakin +Cc: Vadim Fedorenko +Signed-off-by: Paolo Abeni +Stable-dep-of: c64a0727f9b1 ("net: ipv6: fix dst ref loop on input in seg6 lwt") +Signed-off-by: Sasha Levin +--- + include/net/dst.h | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/include/net/dst.h b/include/net/dst.h +index d67fda89cd0fa..3a1a6f94a8092 100644 +--- a/include/net/dst.h ++++ b/include/net/dst.h +@@ -434,6 +434,15 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout) + dst->expires = expires; + } + ++static inline unsigned int dst_dev_overhead(struct dst_entry *dst, ++ struct sk_buff *skb) ++{ ++ if (likely(dst)) ++ return LL_RESERVED_SPACE(dst->dev); ++ ++ return skb->mac_len; ++} ++ + INDIRECT_CALLABLE_DECLARE(int ip6_output(struct net *, struct sock *, + struct sk_buff *)); + INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *, +-- +2.39.5 + diff --git a/queue-6.1/ipv4-convert-icmp_route_lookup-to-dscp_t.patch b/queue-6.1/ipv4-convert-icmp_route_lookup-to-dscp_t.patch new file mode 100644 index 0000000000..f86edfcd59 --- /dev/null +++ b/queue-6.1/ipv4-convert-icmp_route_lookup-to-dscp_t.patch @@ -0,0 +1,81 @@ +From 58dc84dfc7fe7af13d34987cc888d07ad85f3155 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 1 Oct 2024 21:28:37 +0200 +Subject: ipv4: Convert icmp_route_lookup() to dscp_t. + +From: Guillaume Nault + +[ Upstream commit 913c83a610bb7dd8e5952a2b4663e1feec0b5de6 ] + +Pass a dscp_t variable to icmp_route_lookup(), instead of a plain u8, +to prevent accidental setting of ECN bits in ->flowi4_tos. Rename that +variable ("tos" -> "dscp") to make the intent clear. + +While there, reorganise the function parameters to fill up horizontal +space. + +Signed-off-by: Guillaume Nault +Reviewed-by: David Ahern +Link: https://patch.msgid.link/294fead85c6035bcdc5fcf9a6bb4ce8798c45ba1.1727807926.git.gnault@redhat.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 27843ce6ba3d ("ipvlan: ensure network headers are in skb linear part") +Signed-off-by: Sasha Levin +--- + net/ipv4/icmp.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c +index a154339845dd4..855fcef829e2c 100644 +--- a/net/ipv4/icmp.c ++++ b/net/ipv4/icmp.c +@@ -484,13 +484,11 @@ static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb) + return route_lookup_dev; + } + +-static struct rtable *icmp_route_lookup(struct net *net, +- struct flowi4 *fl4, ++static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, + struct sk_buff *skb_in, +- const struct iphdr *iph, +- __be32 saddr, u8 tos, u32 mark, +- int type, int code, +- struct icmp_bxm *param) ++ const struct iphdr *iph, __be32 saddr, ++ dscp_t dscp, u32 mark, int type, ++ int code, struct icmp_bxm *param) + { + struct net_device *route_lookup_dev; + struct rtable *rt, *rt2; +@@ -503,7 +501,7 @@ static struct rtable *icmp_route_lookup(struct net *net, + fl4->saddr = saddr; + fl4->flowi4_mark = mark; + fl4->flowi4_uid = sock_net_uid(net, NULL); +- fl4->flowi4_tos = tos & INET_DSCP_MASK; ++ fl4->flowi4_tos = inet_dscp_to_dsfield(dscp); + fl4->flowi4_proto = IPPROTO_ICMP; + fl4->fl4_icmp_type = type; + fl4->fl4_icmp_code = code; +@@ -551,7 +549,7 @@ static struct rtable *icmp_route_lookup(struct net *net, + orefdst = skb_in->_skb_refdst; /* save old refdst */ + skb_dst_set(skb_in, NULL); + err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr, +- tos, rt2->dst.dev); ++ inet_dscp_to_dsfield(dscp), rt2->dst.dev); + + dst_release(&rt2->dst); + rt2 = skb_rtable(skb_in); +@@ -747,8 +745,9 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, + ipc.opt = &icmp_param.replyopts.opt; + ipc.sockc.mark = mark; + +- rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark, +- type, code, &icmp_param); ++ rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, ++ inet_dsfield_to_dscp(tos), mark, type, code, ++ &icmp_param); + if (IS_ERR(rt)) + goto out_unlock; + +-- +2.39.5 + diff --git a/queue-6.1/ipv4-convert-ip_route_input-to-dscp_t.patch b/queue-6.1/ipv4-convert-ip_route_input-to-dscp_t.patch new file mode 100644 index 0000000000..f811a9c659 --- /dev/null +++ b/queue-6.1/ipv4-convert-ip_route_input-to-dscp_t.patch @@ -0,0 +1,156 @@ +From 61f5ec7e5f8d03c48b3895ab18c882105110647d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 1 Oct 2024 21:28:43 +0200 +Subject: ipv4: Convert ip_route_input() to dscp_t. + +From: Guillaume Nault + +[ Upstream commit 7e863e5db6185b1add0df4cb01b31a4ed1c4b738 ] + +Pass a dscp_t variable to ip_route_input(), instead of a plain u8, to +prevent accidental setting of ECN bits in ->flowi4_tos. + +Callers of ip_route_input() to consider are: + + * input_action_end_dx4_finish() and input_action_end_dt4() in + net/ipv6/seg6_local.c. These functions set the tos parameter to 0, + which is already a valid dscp_t value, so they don't need to be + adjusted for the new prototype. + + * icmp_route_lookup(), which already has a dscp_t variable to pass as + parameter. We just need to remove the inet_dscp_to_dsfield() + conversion. + + * br_nf_pre_routing_finish(), ip_options_rcv_srr() and ip4ip6_err(), + which get the DSCP directly from IPv4 headers. Define a helper to + read the .tos field of struct iphdr as dscp_t, so that these + function don't have to do the conversion manually. + +While there, declare *iph as const in br_nf_pre_routing_finish(), +declare its local variables in reverse-christmas-tree order and move +the "err = ip_route_input()" assignment out of the conditional to avoid +checkpatch warning. + +Signed-off-by: Guillaume Nault +Reviewed-by: David Ahern +Link: https://patch.msgid.link/e9d40781d64d3d69f4c79ac8a008b8d67a033e8d.1727807926.git.gnault@redhat.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 27843ce6ba3d ("ipvlan: ensure network headers are in skb linear part") +Signed-off-by: Sasha Levin +--- + include/net/ip.h | 5 +++++ + include/net/route.h | 5 +++-- + net/bridge/br_netfilter_hooks.c | 8 +++++--- + net/ipv4/icmp.c | 2 +- + net/ipv4/ip_options.c | 3 ++- + net/ipv6/ip6_tunnel.c | 4 ++-- + 6 files changed, 18 insertions(+), 9 deletions(-) + +diff --git a/include/net/ip.h b/include/net/ip.h +index 9d754c4a53002..4ee23eb0814a3 100644 +--- a/include/net/ip.h ++++ b/include/net/ip.h +@@ -409,6 +409,11 @@ int ip_decrease_ttl(struct iphdr *iph) + return --iph->ttl; + } + ++static inline dscp_t ip4h_dscp(const struct iphdr *ip4h) ++{ ++ return inet_dsfield_to_dscp(ip4h->tos); ++} ++ + static inline int ip_mtu_locked(const struct dst_entry *dst) + { + const struct rtable *rt = (const struct rtable *)dst; +diff --git a/include/net/route.h b/include/net/route.h +index f396176022377..4185e6da9ef85 100644 +--- a/include/net/route.h ++++ b/include/net/route.h +@@ -203,12 +203,13 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src, + const struct sk_buff *hint); + + static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, +- u8 tos, struct net_device *devin) ++ dscp_t dscp, struct net_device *devin) + { + int err; + + rcu_read_lock(); +- err = ip_route_input_noref(skb, dst, src, tos, devin); ++ err = ip_route_input_noref(skb, dst, src, inet_dscp_to_dsfield(dscp), ++ devin); + if (!err) { + skb_dst_force(skb); + if (!skb_dst(skb)) +diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c +index 5c6ed1d49b92c..b4d661fe7886d 100644 +--- a/net/bridge/br_netfilter_hooks.c ++++ b/net/bridge/br_netfilter_hooks.c +@@ -366,9 +366,9 @@ br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb, + */ + static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb) + { +- struct net_device *dev = skb->dev, *br_indev; +- struct iphdr *iph = ip_hdr(skb); + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); ++ struct net_device *dev = skb->dev, *br_indev; ++ const struct iphdr *iph = ip_hdr(skb); + struct rtable *rt; + int err; + +@@ -386,7 +386,9 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_ + } + nf_bridge->in_prerouting = 0; + if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) { +- if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { ++ err = ip_route_input(skb, iph->daddr, iph->saddr, ++ ip4h_dscp(iph), dev); ++ if (err) { + struct in_device *in_dev = __in_dev_get_rcu(dev); + + /* If err equals -EHOSTUNREACH the error is due to a +diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c +index 855fcef829e2c..94501bb30c431 100644 +--- a/net/ipv4/icmp.c ++++ b/net/ipv4/icmp.c +@@ -549,7 +549,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, + orefdst = skb_in->_skb_refdst; /* save old refdst */ + skb_dst_set(skb_in, NULL); + err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr, +- inet_dscp_to_dsfield(dscp), rt2->dst.dev); ++ dscp, rt2->dst.dev); + + dst_release(&rt2->dst); + rt2 = skb_rtable(skb_in); +diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c +index a9e22a098872f..b4c59708fc095 100644 +--- a/net/ipv4/ip_options.c ++++ b/net/ipv4/ip_options.c +@@ -617,7 +617,8 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev) + + orefdst = skb->_skb_refdst; + skb_dst_set(skb, NULL); +- err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, dev); ++ err = ip_route_input(skb, nexthop, iph->saddr, ip4h_dscp(iph), ++ dev); + rt2 = skb_rtable(skb); + if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { + skb_dst_drop(skb); +diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c +index f3324f2a40466..a82d382193e41 100644 +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -628,8 +628,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + } + skb_dst_set(skb2, &rt->dst); + } else { +- if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, +- skb2->dev) || ++ if (ip_route_input(skb2, eiph->daddr, eiph->saddr, ++ ip4h_dscp(eiph), skb2->dev) || + skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6) + goto out; + } +-- +2.39.5 + diff --git a/queue-6.1/ipv4-icmp-pass-full-ds-field-to-ip_route_input.patch b/queue-6.1/ipv4-icmp-pass-full-ds-field-to-ip_route_input.patch new file mode 100644 index 0000000000..9d8ea8e533 --- /dev/null +++ b/queue-6.1/ipv4-icmp-pass-full-ds-field-to-ip_route_input.patch @@ -0,0 +1,44 @@ +From 94c1799e9dda9365958ade9cf0bac70aaa0d1eb3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Aug 2024 15:52:49 +0300 +Subject: ipv4: icmp: Pass full DS field to ip_route_input() + +From: Ido Schimmel + +[ Upstream commit 1c6f50b37f711b831d78973dad0df1da99ad0014 ] + +Align the ICMP code to other callers of ip_route_input() and pass the +full DS field. In the future this will allow us to perform a route +lookup according to the full DSCP value. + +No functional changes intended since the upper DSCP bits are masked when +comparing against the TOS selectors in FIB rules and routes. + +Signed-off-by: Ido Schimmel +Reviewed-by: Guillaume Nault +Acked-by: Florian Westphal +Reviewed-by: David Ahern +Link: https://patch.msgid.link/20240821125251.1571445-11-idosch@nvidia.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 27843ce6ba3d ("ipvlan: ensure network headers are in skb linear part") +Signed-off-by: Sasha Levin +--- + net/ipv4/icmp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c +index b05fa424ad5ce..3807a269e0755 100644 +--- a/net/ipv4/icmp.c ++++ b/net/ipv4/icmp.c +@@ -550,7 +550,7 @@ static struct rtable *icmp_route_lookup(struct net *net, + orefdst = skb_in->_skb_refdst; /* save old refdst */ + skb_dst_set(skb_in, NULL); + err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr, +- RT_TOS(tos), rt2->dst.dev); ++ tos, rt2->dst.dev); + + dst_release(&rt2->dst); + rt2 = skb_rtable(skb_in); +-- +2.39.5 + diff --git a/queue-6.1/ipv4-icmp-unmask-upper-dscp-bits-in-icmp_route_looku.patch b/queue-6.1/ipv4-icmp-unmask-upper-dscp-bits-in-icmp_route_looku.patch new file mode 100644 index 0000000000..3122aab017 --- /dev/null +++ b/queue-6.1/ipv4-icmp-unmask-upper-dscp-bits-in-icmp_route_looku.patch @@ -0,0 +1,51 @@ +From 82a825362282a873d74575a1437d17cc84c26a6c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Aug 2024 09:54:50 +0300 +Subject: ipv4: icmp: Unmask upper DSCP bits in icmp_route_lookup() + +From: Ido Schimmel + +[ Upstream commit 4805646c42e51d2fbf142864d281473ad453ad5d ] + +The function is called to resolve a route for an ICMP message that is +sent in response to a situation. Based on the type of the generated ICMP +message, the function is either passed the DS field of the packet that +generated the ICMP message or a DS field that is derived from it. + +Unmask the upper DSCP bits before resolving and output route via +ip_route_output_key_hash() so that in the future the lookup could be +performed according to the full DSCP value. + +Signed-off-by: Ido Schimmel +Reviewed-by: Guillaume Nault +Signed-off-by: David S. Miller +Stable-dep-of: 27843ce6ba3d ("ipvlan: ensure network headers are in skb linear part") +Signed-off-by: Sasha Levin +--- + net/ipv4/icmp.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c +index 3807a269e0755..a154339845dd4 100644 +--- a/net/ipv4/icmp.c ++++ b/net/ipv4/icmp.c +@@ -93,6 +93,7 @@ + #include + #include + #include ++#include + #define CREATE_TRACE_POINTS + #include + +@@ -502,7 +503,7 @@ static struct rtable *icmp_route_lookup(struct net *net, + fl4->saddr = saddr; + fl4->flowi4_mark = mark; + fl4->flowi4_uid = sock_net_uid(net, NULL); +- fl4->flowi4_tos = RT_TOS(tos); ++ fl4->flowi4_tos = tos & INET_DSCP_MASK; + fl4->flowi4_proto = IPPROTO_ICMP; + fl4->fl4_icmp_type = type; + fl4->fl4_icmp_code = code; +-- +2.39.5 + diff --git a/queue-6.1/ipvlan-ensure-network-headers-are-in-skb-linear-part.patch b/queue-6.1/ipvlan-ensure-network-headers-are-in-skb-linear-part.patch new file mode 100644 index 0000000000..2cc9ea0ce7 --- /dev/null +++ b/queue-6.1/ipvlan-ensure-network-headers-are-in-skb-linear-part.patch @@ -0,0 +1,113 @@ +From f7e8fb6cc77cee5f5b699ac13a3bc405f41d41bd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Feb 2025 15:53:36 +0000 +Subject: ipvlan: ensure network headers are in skb linear part + +From: Eric Dumazet + +[ Upstream commit 27843ce6ba3d3122b65066550fe33fb8839f8aef ] + +syzbot found that ipvlan_process_v6_outbound() was assuming +the IPv6 network header isis present in skb->head [1] + +Add the needed pskb_network_may_pull() calls for both +IPv4 and IPv6 handlers. + +[1] +BUG: KMSAN: uninit-value in __ipv6_addr_type+0xa2/0x490 net/ipv6/addrconf_core.c:47 + __ipv6_addr_type+0xa2/0x490 net/ipv6/addrconf_core.c:47 + ipv6_addr_type include/net/ipv6.h:555 [inline] + ip6_route_output_flags_noref net/ipv6/route.c:2616 [inline] + ip6_route_output_flags+0x51/0x720 net/ipv6/route.c:2651 + ip6_route_output include/net/ip6_route.h:93 [inline] + ipvlan_route_v6_outbound+0x24e/0x520 drivers/net/ipvlan/ipvlan_core.c:476 + ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:491 [inline] + ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:541 [inline] + ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:605 [inline] + ipvlan_queue_xmit+0xd72/0x1780 drivers/net/ipvlan/ipvlan_core.c:671 + ipvlan_start_xmit+0x5b/0x210 drivers/net/ipvlan/ipvlan_main.c:223 + __netdev_start_xmit include/linux/netdevice.h:5150 [inline] + netdev_start_xmit include/linux/netdevice.h:5159 [inline] + xmit_one net/core/dev.c:3735 [inline] + dev_hard_start_xmit+0x247/0xa20 net/core/dev.c:3751 + sch_direct_xmit+0x399/0xd40 net/sched/sch_generic.c:343 + qdisc_restart net/sched/sch_generic.c:408 [inline] + __qdisc_run+0x14da/0x35d0 net/sched/sch_generic.c:416 + qdisc_run+0x141/0x4d0 include/net/pkt_sched.h:127 + net_tx_action+0x78b/0x940 net/core/dev.c:5484 + handle_softirqs+0x1a0/0x7c0 kernel/softirq.c:561 + __do_softirq+0x14/0x1a kernel/softirq.c:595 + do_softirq+0x9a/0x100 kernel/softirq.c:462 + __local_bh_enable_ip+0x9f/0xb0 kernel/softirq.c:389 + local_bh_enable include/linux/bottom_half.h:33 [inline] + rcu_read_unlock_bh include/linux/rcupdate.h:919 [inline] + __dev_queue_xmit+0x2758/0x57d0 net/core/dev.c:4611 + dev_queue_xmit include/linux/netdevice.h:3311 [inline] + packet_xmit+0x9c/0x6c0 net/packet/af_packet.c:276 + packet_snd net/packet/af_packet.c:3132 [inline] + packet_sendmsg+0x93e0/0xa7e0 net/packet/af_packet.c:3164 + sock_sendmsg_nosec net/socket.c:718 [inline] + +Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.") +Reported-by: syzbot+93ab4a777bafb9d9f960@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/67b74f01.050a0220.14d86d.02d8.GAE@google.com/T/#u +Signed-off-by: Eric Dumazet +Cc: Mahesh Bandewar +Link: https://patch.msgid.link/20250220155336.61884-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ipvlan/ipvlan_core.c | 21 ++++++++++++++++----- + 1 file changed, 16 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c +index 38eb40cba5aac..eea81a7334052 100644 +--- a/drivers/net/ipvlan/ipvlan_core.c ++++ b/drivers/net/ipvlan/ipvlan_core.c +@@ -416,20 +416,25 @@ struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h, + + static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb) + { +- const struct iphdr *ip4h = ip_hdr(skb); + struct net_device *dev = skb->dev; + struct net *net = dev_net(dev); +- struct rtable *rt; + int err, ret = NET_XMIT_DROP; ++ const struct iphdr *ip4h; ++ struct rtable *rt; + struct flowi4 fl4 = { + .flowi4_oif = dev->ifindex, +- .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)), + .flowi4_flags = FLOWI_FLAG_ANYSRC, + .flowi4_mark = skb->mark, +- .daddr = ip4h->daddr, +- .saddr = ip4h->saddr, + }; + ++ if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) ++ goto err; ++ ++ ip4h = ip_hdr(skb); ++ fl4.daddr = ip4h->daddr; ++ fl4.saddr = ip4h->saddr; ++ fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)); ++ + rt = ip_route_output_flow(net, &fl4, NULL); + if (IS_ERR(rt)) + goto err; +@@ -488,6 +493,12 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb) + struct net_device *dev = skb->dev; + int err, ret = NET_XMIT_DROP; + ++ if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) { ++ DEV_STATS_INC(dev, tx_errors); ++ kfree_skb(skb); ++ return ret; ++ } ++ + err = ipvlan_route_v6_outbound(dev, skb); + if (unlikely(err)) { + DEV_STATS_INC(dev, tx_errors); +-- +2.39.5 + diff --git a/queue-6.1/ipvlan-prepare-ipvlan_process_v4_outbound-to-future-.patch b/queue-6.1/ipvlan-prepare-ipvlan_process_v4_outbound-to-future-.patch new file mode 100644 index 0000000000..0967b7dd88 --- /dev/null +++ b/queue-6.1/ipvlan-prepare-ipvlan_process_v4_outbound-to-future-.patch @@ -0,0 +1,50 @@ +From 094e8682d0f179f4ec92515ca59d4bc7494fc73d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 30 Oct 2024 13:43:11 +0100 +Subject: ipvlan: Prepare ipvlan_process_v4_outbound() to future .flowi4_tos + conversion. + +From: Guillaume Nault + +[ Upstream commit 0c30d6eedd1ec0c1382bcab9576d26413cd278a3 ] + +Use ip4h_dscp() to get the DSCP from the IPv4 header, then convert the +dscp_t value to __u8 with inet_dscp_to_dsfield(). + +Then, when we'll convert .flowi4_tos to dscp_t, we'll just have to drop +the inet_dscp_to_dsfield() call. + +Signed-off-by: Guillaume Nault +Reviewed-by: Ido Schimmel +Link: https://patch.msgid.link/f48335504a05b3587e0081a9b4511e0761571ca5.1730292157.git.gnault@redhat.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 27843ce6ba3d ("ipvlan: ensure network headers are in skb linear part") +Signed-off-by: Sasha Levin +--- + drivers/net/ipvlan/ipvlan_core.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c +index d22a705ac4d6f..38eb40cba5aac 100644 +--- a/drivers/net/ipvlan/ipvlan_core.c ++++ b/drivers/net/ipvlan/ipvlan_core.c +@@ -3,6 +3,7 @@ + */ + + #include ++#include + + #include "ipvlan.h" + +@@ -422,7 +423,7 @@ static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb) + int err, ret = NET_XMIT_DROP; + struct flowi4 fl4 = { + .flowi4_oif = dev->ifindex, +- .flowi4_tos = ip4h->tos & INET_DSCP_MASK, ++ .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)), + .flowi4_flags = FLOWI_FLAG_ANYSRC, + .flowi4_mark = skb->mark, + .daddr = ip4h->daddr, +-- +2.39.5 + diff --git a/queue-6.1/ipvlan-unmask-upper-dscp-bits-in-ipvlan_process_v4_o.patch b/queue-6.1/ipvlan-unmask-upper-dscp-bits-in-ipvlan_process_v4_o.patch new file mode 100644 index 0000000000..dfa2b5e0d9 --- /dev/null +++ b/queue-6.1/ipvlan-unmask-upper-dscp-bits-in-ipvlan_process_v4_o.patch @@ -0,0 +1,47 @@ +From a4bb177add982cea591ed6292be1bbf19ef27c4b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Aug 2024 09:54:57 +0300 +Subject: ipvlan: Unmask upper DSCP bits in ipvlan_process_v4_outbound() + +From: Ido Schimmel + +[ Upstream commit 939cd1abf080c629552a9c5e6db4c0509d13e4c7 ] + +Unmask the upper DSCP bits when calling ip_route_output_flow() so that +in the future it could perform the FIB lookup according to the full DSCP +value. + +Signed-off-by: Ido Schimmel +Reviewed-by: Guillaume Nault +Signed-off-by: David S. Miller +Stable-dep-of: 27843ce6ba3d ("ipvlan: ensure network headers are in skb linear part") +Signed-off-by: Sasha Levin +--- + drivers/net/ipvlan/ipvlan_core.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c +index 1d49771d07f4c..d22a705ac4d6f 100644 +--- a/drivers/net/ipvlan/ipvlan_core.c ++++ b/drivers/net/ipvlan/ipvlan_core.c +@@ -2,6 +2,8 @@ + /* Copyright (c) 2014 Mahesh Bandewar + */ + ++#include ++ + #include "ipvlan.h" + + static u32 ipvlan_jhash_secret __read_mostly; +@@ -420,7 +422,7 @@ static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb) + int err, ret = NET_XMIT_DROP; + struct flowi4 fl4 = { + .flowi4_oif = dev->ifindex, +- .flowi4_tos = RT_TOS(ip4h->tos), ++ .flowi4_tos = ip4h->tos & INET_DSCP_MASK, + .flowi4_flags = FLOWI_FLAG_ANYSRC, + .flowi4_mark = skb->mark, + .daddr = ip4h->daddr, +-- +2.39.5 + diff --git a/queue-6.1/ipvs-always-clear-ipvs_property-flag-in-skb_scrub_pa.patch b/queue-6.1/ipvs-always-clear-ipvs_property-flag-in-skb_scrub_pa.patch new file mode 100644 index 0000000000..d1c8283dd3 --- /dev/null +++ b/queue-6.1/ipvs-always-clear-ipvs_property-flag-in-skb_scrub_pa.patch @@ -0,0 +1,51 @@ +From 01fa3b1054a6e520434a3ca14645c1fd41cb79e2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 22 Feb 2025 11:35:18 +0800 +Subject: ipvs: Always clear ipvs_property flag in skb_scrub_packet() + +From: Philo Lu + +[ Upstream commit de2c211868b9424f9aa9b3432c4430825bafb41b ] + +We found an issue when using bpf_redirect with ipvs NAT mode after +commit ff70202b2d1a ("dev_forward_skb: do not scrub skb mark within +the same name space"). Particularly, we use bpf_redirect to return +the skb directly back to the netif it comes from, i.e., xnet is +false in skb_scrub_packet(), and then ipvs_property is preserved +and SNAT is skipped in the rx path. + +ipvs_property has been already cleared when netns is changed in +commit 2b5ec1a5f973 ("netfilter/ipvs: clear ipvs_property flag when +SKB net namespace changed"). This patch just clears it in spite of +netns. + +Fixes: 2b5ec1a5f973 ("netfilter/ipvs: clear ipvs_property flag when SKB net namespace changed") +Signed-off-by: Philo Lu +Acked-by: Julian Anastasov +Link: https://patch.msgid.link/20250222033518.126087-1-lulie@linux.alibaba.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/core/skbuff.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/core/skbuff.c b/net/core/skbuff.c +index 768b8d65a5baa..d8a3ada886ffb 100644 +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -5556,11 +5556,11 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) + skb->offload_fwd_mark = 0; + skb->offload_l3_fwd_mark = 0; + #endif ++ ipvs_reset(skb); + + if (!xnet) + return; + +- ipvs_reset(skb); + skb->mark = 0; + skb_clear_tstamp(skb); + } +-- +2.39.5 + diff --git a/queue-6.1/net-cadence-macb-synchronize-stats-calculations.patch b/queue-6.1/net-cadence-macb-synchronize-stats-calculations.patch new file mode 100644 index 0000000000..f129e81799 --- /dev/null +++ b/queue-6.1/net-cadence-macb-synchronize-stats-calculations.patch @@ -0,0 +1,115 @@ +From efed222a32805570471b63c78320419d1f7a9c8b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Feb 2025 11:29:50 -0500 +Subject: net: cadence: macb: Synchronize stats calculations + +From: Sean Anderson + +[ Upstream commit fa52f15c745ce55261b92873676f64f7348cfe82 ] + +Stats calculations involve a RMW to add the stat update to the existing +value. This is currently not protected by any synchronization mechanism, +so data races are possible. Add a spinlock to protect the update. The +reader side could be protected using u64_stats, but we would still need +a spinlock for the update side anyway. And we always do an update +immediately before reading the stats anyway. + +Fixes: 89e5785fc8a6 ("[PATCH] Atmel MACB ethernet driver") +Signed-off-by: Sean Anderson +Link: https://patch.msgid.link/20250220162950.95941-1-sean.anderson@linux.dev +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/cadence/macb.h | 2 ++ + drivers/net/ethernet/cadence/macb_main.c | 12 ++++++++++-- + 2 files changed, 12 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h +index 1aa578c1ca4ad..8d66de71ea604 100644 +--- a/drivers/net/ethernet/cadence/macb.h ++++ b/drivers/net/ethernet/cadence/macb.h +@@ -1271,6 +1271,8 @@ struct macb { + struct clk *rx_clk; + struct clk *tsu_clk; + struct net_device *dev; ++ /* Protects hw_stats and ethtool_stats */ ++ spinlock_t stats_lock; + union { + struct macb_stats macb; + struct gem_stats gem; +diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c +index d44d53d697620..fc3342944dbcc 100644 +--- a/drivers/net/ethernet/cadence/macb_main.c ++++ b/drivers/net/ethernet/cadence/macb_main.c +@@ -1936,10 +1936,12 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) + + if (status & MACB_BIT(ISR_ROVR)) { + /* We missed at least one packet */ ++ spin_lock(&bp->stats_lock); + if (macb_is_gem(bp)) + bp->hw_stats.gem.rx_overruns++; + else + bp->hw_stats.macb.rx_overruns++; ++ spin_unlock(&bp->stats_lock); + + if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) + queue_writel(queue, ISR, MACB_BIT(ISR_ROVR)); +@@ -2999,6 +3001,7 @@ static struct net_device_stats *gem_get_stats(struct macb *bp) + if (!netif_running(bp->dev)) + return nstat; + ++ spin_lock_irq(&bp->stats_lock); + gem_update_stats(bp); + + nstat->rx_errors = (hwstat->rx_frame_check_sequence_errors + +@@ -3028,6 +3031,7 @@ static struct net_device_stats *gem_get_stats(struct macb *bp) + nstat->tx_aborted_errors = hwstat->tx_excessive_collisions; + nstat->tx_carrier_errors = hwstat->tx_carrier_sense_errors; + nstat->tx_fifo_errors = hwstat->tx_underrun; ++ spin_unlock_irq(&bp->stats_lock); + + return nstat; + } +@@ -3035,12 +3039,13 @@ static struct net_device_stats *gem_get_stats(struct macb *bp) + static void gem_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) + { +- struct macb *bp; ++ struct macb *bp = netdev_priv(dev); + +- bp = netdev_priv(dev); ++ spin_lock_irq(&bp->stats_lock); + gem_update_stats(bp); + memcpy(data, &bp->ethtool_stats, sizeof(u64) + * (GEM_STATS_LEN + QUEUE_STATS_LEN * MACB_MAX_QUEUES)); ++ spin_unlock_irq(&bp->stats_lock); + } + + static int gem_get_sset_count(struct net_device *dev, int sset) +@@ -3090,6 +3095,7 @@ static struct net_device_stats *macb_get_stats(struct net_device *dev) + return gem_get_stats(bp); + + /* read stats from hardware */ ++ spin_lock_irq(&bp->stats_lock); + macb_update_stats(bp); + + /* Convert HW stats into netdevice stats */ +@@ -3123,6 +3129,7 @@ static struct net_device_stats *macb_get_stats(struct net_device *dev) + nstat->tx_carrier_errors = hwstat->tx_carrier_errors; + nstat->tx_fifo_errors = hwstat->tx_underruns; + /* Don't know about heartbeat or window errors... */ ++ spin_unlock_irq(&bp->stats_lock); + + return nstat; + } +@@ -4949,6 +4956,7 @@ static int macb_probe(struct platform_device *pdev) + bp->usrio = macb_config->usrio; + + spin_lock_init(&bp->lock); ++ spin_lock_init(&bp->stats_lock); + + /* setup capabilities */ + macb_configure_caps(bp, macb_config); +-- +2.39.5 + diff --git a/queue-6.1/net-clear-old-fragment-checksum-value-in-napi_reuse_.patch b/queue-6.1/net-clear-old-fragment-checksum-value-in-napi_reuse_.patch new file mode 100644 index 0000000000..b66cb45125 --- /dev/null +++ b/queue-6.1/net-clear-old-fragment-checksum-value-in-napi_reuse_.patch @@ -0,0 +1,77 @@ +From a8a59c84507057bcdeecb41267c8b25b8a3f0351 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 25 Feb 2025 13:28:52 +0200 +Subject: net: Clear old fragment checksum value in napi_reuse_skb + +From: Mohammad Heib + +[ Upstream commit 49806fe6e61b045b5be8610e08b5a3083c109aa0 ] + +In certain cases, napi_get_frags() returns an skb that points to an old +received fragment, This skb may have its skb->ip_summed, csum, and other +fields set from previous fragment handling. + +Some network drivers set skb->ip_summed to either CHECKSUM_COMPLETE or +CHECKSUM_UNNECESSARY when getting skb from napi_get_frags(), while +others only set skb->ip_summed when RX checksum offload is enabled on +the device, and do not set any value for skb->ip_summed when hardware +checksum offload is disabled, assuming that the skb->ip_summed +initiated to zero by napi_reuse_skb, ionic driver for example will +ignore/unset any value for the ip_summed filed if HW checksum offload is +disabled, and if we have a situation where the user disables the +checksum offload during a traffic that could lead to the following +errors shown in the kernel logs: + +dump_stack_lvl+0x34/0x48 + __skb_gro_checksum_complete+0x7e/0x90 +tcp6_gro_receive+0xc6/0x190 +ipv6_gro_receive+0x1ec/0x430 +dev_gro_receive+0x188/0x360 +? ionic_rx_clean+0x25a/0x460 [ionic] +napi_gro_frags+0x13c/0x300 +? __pfx_ionic_rx_service+0x10/0x10 [ionic] +ionic_rx_service+0x67/0x80 [ionic] +ionic_cq_service+0x58/0x90 [ionic] +ionic_txrx_napi+0x64/0x1b0 [ionic] + __napi_poll+0x27/0x170 +net_rx_action+0x29c/0x370 +handle_softirqs+0xce/0x270 +__irq_exit_rcu+0xa3/0xc0 +common_interrupt+0x80/0xa0 + + +This inconsistency sometimes leads to checksum validation issues in the +upper layers of the network stack. + +To resolve this, this patch clears the skb->ip_summed value for each +reused skb in by napi_reuse_skb(), ensuring that the caller is responsible +for setting the correct checksum status. This eliminates potential +checksum validation issues caused by improper handling of +skb->ip_summed. + +Fixes: 76620aafd66f ("gro: New frags interface to avoid copying shinfo") +Signed-off-by: Mohammad Heib +Reviewed-by: Shannon Nelson +Reviewed-by: Eric Dumazet +Link: https://patch.msgid.link/20250225112852.2507709-1-mheib@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/core/gro.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/core/gro.c b/net/core/gro.c +index 47118e97ecfdd..c4cbf398c5f78 100644 +--- a/net/core/gro.c ++++ b/net/core/gro.c +@@ -679,6 +679,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) + skb->pkt_type = PACKET_HOST; + + skb->encapsulation = 0; ++ skb->ip_summed = CHECKSUM_NONE; + skb_shinfo(skb)->gso_type = 0; + skb_shinfo(skb)->gso_size = 0; + if (unlikely(skb->slow_gro)) { +-- +2.39.5 + diff --git a/queue-6.1/net-ipv4-add-tracepoint-for-icmp_send.patch b/queue-6.1/net-ipv4-add-tracepoint-for-icmp_send.patch new file mode 100644 index 0000000000..746b5abe4c --- /dev/null +++ b/queue-6.1/net-ipv4-add-tracepoint-for-icmp_send.patch @@ -0,0 +1,153 @@ +From d4efcf4743e3568a179f36d9fc4c98b970b38ef3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 7 May 2024 15:41:03 +0800 +Subject: net/ipv4: add tracepoint for icmp_send + +From: Peilin He + +[ Upstream commit db3efdcf70c752e8a8deb16071d8e693c3ef8746 ] + +Introduce a tracepoint for icmp_send, which can help users to get more +detail information conveniently when icmp abnormal events happen. + +1. Giving an usecase example: +============================= +When an application experiences packet loss due to an unreachable UDP +destination port, the kernel will send an exception message through the +icmp_send function. By adding a trace point for icmp_send, developers or +system administrators can obtain detailed information about the UDP +packet loss, including the type, code, source address, destination address, +source port, and destination port. This facilitates the trouble-shooting +of UDP packet loss issues especially for those network-service +applications. + +2. Operation Instructions: +========================== +Switch to the tracing directory. + cd /sys/kernel/tracing +Filter for destination port unreachable. + echo "type==3 && code==3" > events/icmp/icmp_send/filter +Enable trace event. + echo 1 > events/icmp/icmp_send/enable + +3. Result View: +================ + udp_client_erro-11370 [002] ...s.12 124.728002: + icmp_send: icmp_send: type=3, code=3. + From 127.0.0.1:41895 to 127.0.0.1:6666 ulen=23 + skbaddr=00000000589b167a + +Signed-off-by: Peilin He +Signed-off-by: xu xin +Reviewed-by: Yunkai Zhang +Cc: Yang Yang +Cc: Liu Chun +Cc: Xuexin Jiang +Reviewed-by: Steven Rostedt (Google) +Signed-off-by: David S. Miller +Stable-dep-of: 27843ce6ba3d ("ipvlan: ensure network headers are in skb linear part") +Signed-off-by: Sasha Levin +--- + include/trace/events/icmp.h | 67 +++++++++++++++++++++++++++++++++++++ + net/ipv4/icmp.c | 4 +++ + 2 files changed, 71 insertions(+) + create mode 100644 include/trace/events/icmp.h + +diff --git a/include/trace/events/icmp.h b/include/trace/events/icmp.h +new file mode 100644 +index 0000000000000..31559796949a7 +--- /dev/null ++++ b/include/trace/events/icmp.h +@@ -0,0 +1,67 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#undef TRACE_SYSTEM ++#define TRACE_SYSTEM icmp ++ ++#if !defined(_TRACE_ICMP_H) || defined(TRACE_HEADER_MULTI_READ) ++#define _TRACE_ICMP_H ++ ++#include ++#include ++ ++TRACE_EVENT(icmp_send, ++ ++ TP_PROTO(const struct sk_buff *skb, int type, int code), ++ ++ TP_ARGS(skb, type, code), ++ ++ TP_STRUCT__entry( ++ __field(const void *, skbaddr) ++ __field(int, type) ++ __field(int, code) ++ __array(__u8, saddr, 4) ++ __array(__u8, daddr, 4) ++ __field(__u16, sport) ++ __field(__u16, dport) ++ __field(unsigned short, ulen) ++ ), ++ ++ TP_fast_assign( ++ struct iphdr *iph = ip_hdr(skb); ++ struct udphdr *uh = udp_hdr(skb); ++ int proto_4 = iph->protocol; ++ __be32 *p32; ++ ++ __entry->skbaddr = skb; ++ __entry->type = type; ++ __entry->code = code; ++ ++ if (proto_4 != IPPROTO_UDP || (u8 *)uh < skb->head || ++ (u8 *)uh + sizeof(struct udphdr) ++ > skb_tail_pointer(skb)) { ++ __entry->sport = 0; ++ __entry->dport = 0; ++ __entry->ulen = 0; ++ } else { ++ __entry->sport = ntohs(uh->source); ++ __entry->dport = ntohs(uh->dest); ++ __entry->ulen = ntohs(uh->len); ++ } ++ ++ p32 = (__be32 *) __entry->saddr; ++ *p32 = iph->saddr; ++ ++ p32 = (__be32 *) __entry->daddr; ++ *p32 = iph->daddr; ++ ), ++ ++ TP_printk("icmp_send: type=%d, code=%d. From %pI4:%u to %pI4:%u ulen=%d skbaddr=%p", ++ __entry->type, __entry->code, ++ __entry->saddr, __entry->sport, __entry->daddr, ++ __entry->dport, __entry->ulen, __entry->skbaddr) ++); ++ ++#endif /* _TRACE_ICMP_H */ ++ ++/* This part must be outside protection */ ++#include ++ +diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c +index a21d32b3ae6c3..b05fa424ad5ce 100644 +--- a/net/ipv4/icmp.c ++++ b/net/ipv4/icmp.c +@@ -93,6 +93,8 @@ + #include + #include + #include ++#define CREATE_TRACE_POINTS ++#include + + /* + * Build xmit assembly blocks +@@ -778,6 +780,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, + if (!fl4.saddr) + fl4.saddr = htonl(INADDR_DUMMY); + ++ trace_icmp_send(skb_in, type, code); ++ + icmp_push_reply(sk, &icmp_param, &fl4, &ipc, &rt); + ende: + ip_rt_put(rt); +-- +2.39.5 + diff --git a/queue-6.1/net-ipv6-fix-dst-ref-loop-on-input-in-rpl-lwt.patch b/queue-6.1/net-ipv6-fix-dst-ref-loop-on-input-in-rpl-lwt.patch new file mode 100644 index 0000000000..9c297a9561 --- /dev/null +++ b/queue-6.1/net-ipv6-fix-dst-ref-loop-on-input-in-rpl-lwt.patch @@ -0,0 +1,60 @@ +From 0dcd93ff196729cdfcad0a1a07b22d2acf2066b5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 25 Feb 2025 18:51:39 +0100 +Subject: net: ipv6: fix dst ref loop on input in rpl lwt + +From: Justin Iurman + +[ Upstream commit 13e55fbaec176119cff68a7e1693b251c8883c5f ] + +Prevent a dst ref loop on input in rpl_iptunnel. + +Fixes: a7a29f9c361f ("net: ipv6: add rpl sr tunnel") +Cc: Alexander Aring +Cc: Ido Schimmel +Reviewed-by: Ido Schimmel +Signed-off-by: Justin Iurman +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/ipv6/rpl_iptunnel.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c +index 69b9bd90140dd..862ac1e2e191c 100644 +--- a/net/ipv6/rpl_iptunnel.c ++++ b/net/ipv6/rpl_iptunnel.c +@@ -259,10 +259,18 @@ static int rpl_input(struct sk_buff *skb) + { + struct dst_entry *orig_dst = skb_dst(skb); + struct dst_entry *dst = NULL; ++ struct lwtunnel_state *lwtst; + struct rpl_lwt *rlwt; + int err; + +- rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate); ++ /* We cannot dereference "orig_dst" once ip6_route_input() or ++ * skb_dst_drop() is called. However, in order to detect a dst loop, we ++ * need the address of its lwtstate. So, save the address of lwtstate ++ * now and use it later as a comparison. ++ */ ++ lwtst = orig_dst->lwtstate; ++ ++ rlwt = rpl_lwt_lwtunnel(lwtst); + + local_bh_disable(); + dst = dst_cache_get(&rlwt->cache); +@@ -277,7 +285,9 @@ static int rpl_input(struct sk_buff *skb) + if (!dst) { + ip6_route_input(skb); + dst = skb_dst(skb); +- if (!dst->error) { ++ ++ /* cache only if we don't create a dst reference loop */ ++ if (!dst->error && lwtst != dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&rlwt->cache, dst, + &ipv6_hdr(skb)->saddr); +-- +2.39.5 + diff --git a/queue-6.1/net-ipv6-fix-dst-ref-loop-on-input-in-seg6-lwt.patch b/queue-6.1/net-ipv6-fix-dst-ref-loop-on-input-in-seg6-lwt.patch new file mode 100644 index 0000000000..856ebf0c92 --- /dev/null +++ b/queue-6.1/net-ipv6-fix-dst-ref-loop-on-input-in-seg6-lwt.patch @@ -0,0 +1,60 @@ +From 930eea7dd45aac86e88ea450e3a5be502d409592 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 25 Feb 2025 18:51:38 +0100 +Subject: net: ipv6: fix dst ref loop on input in seg6 lwt + +From: Justin Iurman + +[ Upstream commit c64a0727f9b1cbc63a5538c8c0014e9a175ad864 ] + +Prevent a dst ref loop on input in seg6_iptunnel. + +Fixes: af4a2209b134 ("ipv6: sr: use dst_cache in seg6_input") +Cc: David Lebrun +Cc: Ido Schimmel +Reviewed-by: Ido Schimmel +Signed-off-by: Justin Iurman +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/ipv6/seg6_iptunnel.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c +index c161298c8b335..b186d85ec5b3f 100644 +--- a/net/ipv6/seg6_iptunnel.c ++++ b/net/ipv6/seg6_iptunnel.c +@@ -472,10 +472,18 @@ static int seg6_input_core(struct net *net, struct sock *sk, + { + struct dst_entry *orig_dst = skb_dst(skb); + struct dst_entry *dst = NULL; ++ struct lwtunnel_state *lwtst; + struct seg6_lwt *slwt; + int err; + +- slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); ++ /* We cannot dereference "orig_dst" once ip6_route_input() or ++ * skb_dst_drop() is called. However, in order to detect a dst loop, we ++ * need the address of its lwtstate. So, save the address of lwtstate ++ * now and use it later as a comparison. ++ */ ++ lwtst = orig_dst->lwtstate; ++ ++ slwt = seg6_lwt_lwtunnel(lwtst); + + local_bh_disable(); + dst = dst_cache_get(&slwt->cache); +@@ -490,7 +498,9 @@ static int seg6_input_core(struct net *net, struct sock *sk, + if (!dst) { + ip6_route_input(skb); + dst = skb_dst(skb); +- if (!dst->error) { ++ ++ /* cache only if we don't create a dst reference loop */ ++ if (!dst->error && lwtst != dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&slwt->cache, dst, + &ipv6_hdr(skb)->saddr); +-- +2.39.5 + diff --git a/queue-6.1/net-ipv6-rpl_iptunnel-mitigate-2-realloc-issue.patch b/queue-6.1/net-ipv6-rpl_iptunnel-mitigate-2-realloc-issue.patch new file mode 100644 index 0000000000..35e4422ef5 --- /dev/null +++ b/queue-6.1/net-ipv6-rpl_iptunnel-mitigate-2-realloc-issue.patch @@ -0,0 +1,155 @@ +From 06016721218287aa40735e5ef106176afd3ef85a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Dec 2024 13:49:45 +0100 +Subject: net: ipv6: rpl_iptunnel: mitigate 2-realloc issue + +From: Justin Iurman + +[ Upstream commit 985ec6f5e6235242191370628acb73d7a9f0c0ea ] + +This patch mitigates the two-reallocations issue with rpl_iptunnel by +providing the dst_entry (in the cache) to the first call to +skb_cow_head(). As a result, the very first iteration would still +trigger two reallocations (i.e., empty cache), while next iterations +would only trigger a single reallocation. + +Performance tests before/after applying this patch, which clearly shows +there is no impact (it even shows improvement): +- before: https://ibb.co/nQJhqwc +- after: https://ibb.co/4ZvW6wV + +Signed-off-by: Justin Iurman +Cc: Alexander Aring +Signed-off-by: Paolo Abeni +Stable-dep-of: 13e55fbaec17 ("net: ipv6: fix dst ref loop on input in rpl lwt") +Signed-off-by: Sasha Levin +--- + net/ipv6/rpl_iptunnel.c | 46 ++++++++++++++++++++++------------------- + 1 file changed, 25 insertions(+), 21 deletions(-) + +diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c +index c1d0f947a7c87..69b9bd90140dd 100644 +--- a/net/ipv6/rpl_iptunnel.c ++++ b/net/ipv6/rpl_iptunnel.c +@@ -125,7 +125,8 @@ static void rpl_destroy_state(struct lwtunnel_state *lwt) + } + + static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt, +- const struct ipv6_rpl_sr_hdr *srh) ++ const struct ipv6_rpl_sr_hdr *srh, ++ struct dst_entry *cache_dst) + { + struct ipv6_rpl_sr_hdr *isrh, *csrh; + const struct ipv6hdr *oldhdr; +@@ -153,7 +154,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt, + + hdrlen = ((csrh->hdrlen + 1) << 3); + +- err = skb_cow_head(skb, hdrlen + skb->mac_len); ++ err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb)); + if (unlikely(err)) { + kfree(buf); + return err; +@@ -186,7 +187,8 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt, + return 0; + } + +-static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt) ++static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt, ++ struct dst_entry *cache_dst) + { + struct dst_entry *dst = skb_dst(skb); + struct rpl_iptunnel_encap *tinfo; +@@ -196,7 +198,7 @@ static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt) + + tinfo = rpl_encap_lwtunnel(dst->lwtstate); + +- return rpl_do_srh_inline(skb, rlwt, tinfo->srh); ++ return rpl_do_srh_inline(skb, rlwt, tinfo->srh, cache_dst); + } + + static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) +@@ -208,14 +210,14 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) + + rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate); + +- err = rpl_do_srh(skb, rlwt); +- if (unlikely(err)) +- goto drop; +- + local_bh_disable(); + dst = dst_cache_get(&rlwt->cache); + local_bh_enable(); + ++ err = rpl_do_srh(skb, rlwt, dst); ++ if (unlikely(err)) ++ goto drop; ++ + if (unlikely(!dst)) { + struct ipv6hdr *hdr = ipv6_hdr(skb); + struct flowi6 fl6; +@@ -237,15 +239,15 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) + local_bh_disable(); + dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr); + local_bh_enable(); ++ ++ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); ++ if (unlikely(err)) ++ goto drop; + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + +- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); +- if (unlikely(err)) +- goto drop; +- + return dst_output(net, sk, skb); + + drop: +@@ -262,12 +264,13 @@ static int rpl_input(struct sk_buff *skb) + + rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate); + +- err = rpl_do_srh(skb, rlwt); +- if (unlikely(err)) +- goto drop; +- + local_bh_disable(); + dst = dst_cache_get(&rlwt->cache); ++ local_bh_enable(); ++ ++ err = rpl_do_srh(skb, rlwt, dst); ++ if (unlikely(err)) ++ goto drop; + + skb_dst_drop(skb); + +@@ -275,17 +278,18 @@ static int rpl_input(struct sk_buff *skb) + ip6_route_input(skb); + dst = skb_dst(skb); + if (!dst->error) { ++ local_bh_disable(); + dst_cache_set_ip6(&rlwt->cache, dst, + &ipv6_hdr(skb)->saddr); ++ local_bh_enable(); + } ++ ++ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); ++ if (unlikely(err)) ++ goto drop; + } else { + skb_dst_set(skb, dst); + } +- local_bh_enable(); +- +- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); +- if (unlikely(err)) +- goto drop; + + return dst_input(skb); + +-- +2.39.5 + diff --git a/queue-6.1/net-ipv6-seg6_iptunnel-mitigate-2-realloc-issue.patch b/queue-6.1/net-ipv6-seg6_iptunnel-mitigate-2-realloc-issue.patch new file mode 100644 index 0000000000..830cc9488a --- /dev/null +++ b/queue-6.1/net-ipv6-seg6_iptunnel-mitigate-2-realloc-issue.patch @@ -0,0 +1,255 @@ +From ce40c0dcf493d7ab3793ef8271dd62197a32bc24 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Dec 2024 13:49:44 +0100 +Subject: net: ipv6: seg6_iptunnel: mitigate 2-realloc issue + +From: Justin Iurman + +[ Upstream commit 40475b63761abb6f8fdef960d03228a08662c9c4 ] + +This patch mitigates the two-reallocations issue with seg6_iptunnel by +providing the dst_entry (in the cache) to the first call to +skb_cow_head(). As a result, the very first iteration would still +trigger two reallocations (i.e., empty cache), while next iterations +would only trigger a single reallocation. + +Performance tests before/after applying this patch, which clearly shows +the improvement: +- before: https://ibb.co/3Cg4sNH +- after: https://ibb.co/8rQ350r + +Signed-off-by: Justin Iurman +Cc: David Lebrun +Signed-off-by: Paolo Abeni +Stable-dep-of: c64a0727f9b1 ("net: ipv6: fix dst ref loop on input in seg6 lwt") +Signed-off-by: Sasha Levin +--- + net/ipv6/seg6_iptunnel.c | 85 ++++++++++++++++++++++++---------------- + 1 file changed, 52 insertions(+), 33 deletions(-) + +diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c +index ae5299c277bcf..c161298c8b335 100644 +--- a/net/ipv6/seg6_iptunnel.c ++++ b/net/ipv6/seg6_iptunnel.c +@@ -124,8 +124,8 @@ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb, + return flowlabel; + } + +-/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ +-int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) ++static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, ++ int proto, struct dst_entry *cache_dst) + { + struct dst_entry *dst = skb_dst(skb); + struct net *net = dev_net(dst->dev); +@@ -137,7 +137,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) + hdrlen = (osrh->hdrlen + 1) << 3; + tot_len = hdrlen + sizeof(*hdr); + +- err = skb_cow_head(skb, tot_len + skb->mac_len); ++ err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb)); + if (unlikely(err)) + return err; + +@@ -197,11 +197,18 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) + + return 0; + } ++ ++/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ ++int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) ++{ ++ return __seg6_do_srh_encap(skb, osrh, proto, NULL); ++} + EXPORT_SYMBOL_GPL(seg6_do_srh_encap); + + /* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */ + static int seg6_do_srh_encap_red(struct sk_buff *skb, +- struct ipv6_sr_hdr *osrh, int proto) ++ struct ipv6_sr_hdr *osrh, int proto, ++ struct dst_entry *cache_dst) + { + __u8 first_seg = osrh->first_segment; + struct dst_entry *dst = skb_dst(skb); +@@ -230,7 +237,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb, + + tot_len = red_hdrlen + sizeof(struct ipv6hdr); + +- err = skb_cow_head(skb, tot_len + skb->mac_len); ++ err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb)); + if (unlikely(err)) + return err; + +@@ -317,8 +324,8 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb, + return 0; + } + +-/* insert an SRH within an IPv6 packet, just after the IPv6 header */ +-int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) ++static int __seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, ++ struct dst_entry *cache_dst) + { + struct ipv6hdr *hdr, *oldhdr; + struct ipv6_sr_hdr *isrh; +@@ -326,7 +333,7 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) + + hdrlen = (osrh->hdrlen + 1) << 3; + +- err = skb_cow_head(skb, hdrlen + skb->mac_len); ++ err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb)); + if (unlikely(err)) + return err; + +@@ -369,9 +376,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) + + return 0; + } +-EXPORT_SYMBOL_GPL(seg6_do_srh_inline); + +-static int seg6_do_srh(struct sk_buff *skb) ++static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst) + { + struct dst_entry *dst = skb_dst(skb); + struct seg6_iptunnel_encap *tinfo; +@@ -384,7 +390,7 @@ static int seg6_do_srh(struct sk_buff *skb) + if (skb->protocol != htons(ETH_P_IPV6)) + return -EINVAL; + +- err = seg6_do_srh_inline(skb, tinfo->srh); ++ err = __seg6_do_srh_inline(skb, tinfo->srh, cache_dst); + if (err) + return err; + break; +@@ -402,9 +408,11 @@ static int seg6_do_srh(struct sk_buff *skb) + return -EINVAL; + + if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP) +- err = seg6_do_srh_encap(skb, tinfo->srh, proto); ++ err = __seg6_do_srh_encap(skb, tinfo->srh, ++ proto, cache_dst); + else +- err = seg6_do_srh_encap_red(skb, tinfo->srh, proto); ++ err = seg6_do_srh_encap_red(skb, tinfo->srh, ++ proto, cache_dst); + + if (err) + return err; +@@ -425,11 +433,13 @@ static int seg6_do_srh(struct sk_buff *skb) + skb_push(skb, skb->mac_len); + + if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP) +- err = seg6_do_srh_encap(skb, tinfo->srh, +- IPPROTO_ETHERNET); ++ err = __seg6_do_srh_encap(skb, tinfo->srh, ++ IPPROTO_ETHERNET, ++ cache_dst); + else + err = seg6_do_srh_encap_red(skb, tinfo->srh, +- IPPROTO_ETHERNET); ++ IPPROTO_ETHERNET, ++ cache_dst); + + if (err) + return err; +@@ -444,6 +454,13 @@ static int seg6_do_srh(struct sk_buff *skb) + return 0; + } + ++/* insert an SRH within an IPv6 packet, just after the IPv6 header */ ++int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) ++{ ++ return __seg6_do_srh_inline(skb, osrh, NULL); ++} ++EXPORT_SYMBOL_GPL(seg6_do_srh_inline); ++ + static int seg6_input_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) + { +@@ -458,14 +475,15 @@ static int seg6_input_core(struct net *net, struct sock *sk, + struct seg6_lwt *slwt; + int err; + +- err = seg6_do_srh(skb); +- if (unlikely(err)) +- goto drop; +- + slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); + + local_bh_disable(); + dst = dst_cache_get(&slwt->cache); ++ local_bh_enable(); ++ ++ err = seg6_do_srh(skb, dst); ++ if (unlikely(err)) ++ goto drop; + + skb_dst_drop(skb); + +@@ -473,17 +491,18 @@ static int seg6_input_core(struct net *net, struct sock *sk, + ip6_route_input(skb); + dst = skb_dst(skb); + if (!dst->error) { ++ local_bh_disable(); + dst_cache_set_ip6(&slwt->cache, dst, + &ipv6_hdr(skb)->saddr); ++ local_bh_enable(); + } ++ ++ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); ++ if (unlikely(err)) ++ goto drop; + } else { + skb_dst_set(skb, dst); + } +- local_bh_enable(); +- +- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); +- if (unlikely(err)) +- goto drop; + + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, +@@ -529,16 +548,16 @@ static int seg6_output_core(struct net *net, struct sock *sk, + struct seg6_lwt *slwt; + int err; + +- err = seg6_do_srh(skb); +- if (unlikely(err)) +- goto drop; +- + slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); + + local_bh_disable(); + dst = dst_cache_get(&slwt->cache); + local_bh_enable(); + ++ err = seg6_do_srh(skb, dst); ++ if (unlikely(err)) ++ goto drop; ++ + if (unlikely(!dst)) { + struct ipv6hdr *hdr = ipv6_hdr(skb); + struct flowi6 fl6; +@@ -560,15 +579,15 @@ static int seg6_output_core(struct net *net, struct sock *sk, + local_bh_disable(); + dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); + local_bh_enable(); ++ ++ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); ++ if (unlikely(err)) ++ goto drop; + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + +- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); +- if (unlikely(err)) +- goto drop; +- + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, + NULL, skb_dst(skb)->dev, dst_output); +-- +2.39.5 + diff --git a/queue-6.1/net-loopback-avoid-sending-ip-packets-without-an-eth.patch b/queue-6.1/net-loopback-avoid-sending-ip-packets-without-an-eth.patch new file mode 100644 index 0000000000..eb617301e9 --- /dev/null +++ b/queue-6.1/net-loopback-avoid-sending-ip-packets-without-an-eth.patch @@ -0,0 +1,94 @@ +From ec4f8ab14ccc843b21181b75e9f9b0f454be3f8a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Feb 2025 09:25:59 +0200 +Subject: net: loopback: Avoid sending IP packets without an Ethernet header + +From: Ido Schimmel + +[ Upstream commit 0e4427f8f587c4b603475468bb3aee9418574893 ] + +After commit 22600596b675 ("ipv4: give an IPv4 dev to blackhole_netdev") +IPv4 neighbors can be constructed on the blackhole net device, but they +are constructed with an output function (neigh_direct_output()) that +simply calls dev_queue_xmit(). The latter will transmit packets via +'skb->dev' which might not be the blackhole net device if dst_dev_put() +switched 'dst->dev' to the blackhole net device while another CPU was +using the dst entry in ip_output(), but after it already initialized +'skb->dev' from 'dst->dev'. + +Specifically, the following can happen: + + CPU1 CPU2 + +udp_sendmsg(sk1) udp_sendmsg(sk2) +udp_send_skb() [...] +ip_output() + skb->dev = skb_dst(skb)->dev + dst_dev_put() + dst->dev = blackhole_netdev +ip_finish_output2() + resolves neigh on dst->dev +neigh_output() +neigh_direct_output() +dev_queue_xmit() + +This will result in IPv4 packets being sent without an Ethernet header +via a valid net device: + +tcpdump: verbose output suppressed, use -v[v]... for full protocol decode +listening on enp9s0, link-type EN10MB (Ethernet), snapshot length 262144 bytes +22:07:02.329668 20:00:40:11:18:fb > 45:00:00:44:f4:94, ethertype Unknown +(0x58c6), length 68: + 0x0000: 8dda 74ca f1ae ca6c ca6c 0098 969c 0400 ..t....l.l...... + 0x0010: 0000 4730 3f18 6800 0000 0000 0000 9971 ..G0?.h........q + 0x0020: c4c9 9055 a157 0a70 9ead bf83 38ca ab38 ...U.W.p....8..8 + 0x0030: 8add ab96 e052 .....R + +Fix by making sure that neighbors are constructed on top of the +blackhole net device with an output function that simply consumes the +packets, in a similar fashion to dst_discard_out() and +blackhole_netdev_xmit(). + +Fixes: 8d7017fd621d ("blackhole_netdev: use blackhole_netdev to invalidate dst entries") +Fixes: 22600596b675 ("ipv4: give an IPv4 dev to blackhole_netdev") +Reported-by: Florian Meister +Closes: https://lore.kernel.org/netdev/20250210084931.23a5c2e4@hermes.local/ +Signed-off-by: Ido Schimmel +Reviewed-by: Eric Dumazet +Link: https://patch.msgid.link/20250220072559.782296-1-idosch@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/loopback.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c +index 2e9742952c4e9..b213397672d22 100644 +--- a/drivers/net/loopback.c ++++ b/drivers/net/loopback.c +@@ -246,8 +246,22 @@ static netdev_tx_t blackhole_netdev_xmit(struct sk_buff *skb, + return NETDEV_TX_OK; + } + ++static int blackhole_neigh_output(struct neighbour *n, struct sk_buff *skb) ++{ ++ kfree_skb(skb); ++ return 0; ++} ++ ++static int blackhole_neigh_construct(struct net_device *dev, ++ struct neighbour *n) ++{ ++ n->output = blackhole_neigh_output; ++ return 0; ++} ++ + static const struct net_device_ops blackhole_netdev_ops = { + .ndo_start_xmit = blackhole_netdev_xmit, ++ .ndo_neigh_construct = blackhole_neigh_construct, + }; + + /* This is a dst-dummy device used specifically for invalidated +-- +2.39.5 + diff --git a/queue-6.1/net-mlx5-irq-fix-null-string-in-debug-print.patch b/queue-6.1/net-mlx5-irq-fix-null-string-in-debug-print.patch new file mode 100644 index 0000000000..9f57411a8c --- /dev/null +++ b/queue-6.1/net-mlx5-irq-fix-null-string-in-debug-print.patch @@ -0,0 +1,42 @@ +From 9d779a68a89614f61d7795a6a03b3cf510a7dae8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 25 Feb 2025 09:26:08 +0200 +Subject: net/mlx5: IRQ, Fix null string in debug print + +From: Shay Drory + +[ Upstream commit 2f5a6014eb168a97b24153adccfa663d3b282767 ] + +irq_pool_alloc() debug print can print a null string. +Fix it by providing a default string to print. + +Fixes: 71e084e26414 ("net/mlx5: Allocating a pool of MSI-X vectors for SFs") +Signed-off-by: Shay Drory +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202501141055.SwfIphN0-lkp@intel.com/ +Reviewed-by: Moshe Shemesh +Signed-off-by: Tariq Toukan +Reviewed-by: Kalesh AP +Link: https://patch.msgid.link/20250225072608.526866-4-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +index a6d3fc96e1685..10b9dc2aaf06f 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +@@ -513,7 +513,7 @@ irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, + pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ; + pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ; + mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d", +- name, size, start); ++ name ? name : "mlx5_pcif_pool", size, start); + return pool; + } + +-- +2.39.5 + diff --git a/queue-6.1/net-mvpp2-cls-fixed-non-ip-flow-with-vlan-tag-flow-d.patch b/queue-6.1/net-mvpp2-cls-fixed-non-ip-flow-with-vlan-tag-flow-d.patch new file mode 100644 index 0000000000..26dfb300f5 --- /dev/null +++ b/queue-6.1/net-mvpp2-cls-fixed-non-ip-flow-with-vlan-tag-flow-d.patch @@ -0,0 +1,40 @@ +From 0ab9b9d1325124e867cfa43f9acb1a500ed865b0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 24 Feb 2025 20:20:58 -0800 +Subject: net: mvpp2: cls: Fixed Non IP flow, with vlan tag flow defination. + +From: Harshal Chaudhari + +[ Upstream commit 2d253726ff7106b39a44483b6864398bba8a2f74 ] + +Non IP flow, with vlan tag not working as expected while +running below command for vlan-priority. fixed that. + +ethtool -N eth1 flow-type ether vlan 0x8000 vlan-mask 0x1fff action 0 loc 0 + +Fixes: 1274daede3ef ("net: mvpp2: cls: Add steering based on vlan Id and priority.") +Signed-off-by: Harshal Chaudhari +Reviewed-by: Maxime Chevallier +Link: https://patch.msgid.link/20250225042058.2643838-1-hchaudhari@marvell.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c +index 40aeaa7bd739f..d2757cc116139 100644 +--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c ++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c +@@ -324,7 +324,7 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = { + MVPP2_PRS_RI_VLAN_MASK), + /* Non IP flow, with vlan tag */ + MVPP2_DEF_FLOW(MVPP22_FLOW_ETHERNET, MVPP2_FL_NON_IP_TAG, +- MVPP22_CLS_HEK_OPT_VLAN, ++ MVPP22_CLS_HEK_TAGGED, + 0, 0), + }; + +-- +2.39.5 + diff --git a/queue-6.1/net-set-the-minimum-for-net_hotdata.netdev_budget_us.patch b/queue-6.1/net-set-the-minimum-for-net_hotdata.netdev_budget_us.patch new file mode 100644 index 0000000000..e21277149f --- /dev/null +++ b/queue-6.1/net-set-the-minimum-for-net_hotdata.netdev_budget_us.patch @@ -0,0 +1,58 @@ +From b1d4a55ec10fb6d2c11e13f89f1816e027267147 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Feb 2025 12:07:52 +0100 +Subject: net: set the minimum for net_hotdata.netdev_budget_usecs + +From: Jiri Slaby (SUSE) + +[ Upstream commit c180188ec02281126045414e90d08422a80f75b4 ] + +Commit 7acf8a1e8a28 ("Replace 2 jiffies with sysctl netdev_budget_usecs +to enable softirq tuning") added a possibility to set +net_hotdata.netdev_budget_usecs, but added no lower bound checking. + +Commit a4837980fd9f ("net: revert default NAPI poll timeout to 2 jiffies") +made the *initial* value HZ-dependent, so the initial value is at least +2 jiffies even for lower HZ values (2 ms for 1000 Hz, 8ms for 250 Hz, 20 +ms for 100 Hz). + +But a user still can set improper values by a sysctl. Set .extra1 +(the lower bound) for net_hotdata.netdev_budget_usecs to the same value +as in the latter commit. That is to 2 jiffies. + +Fixes: a4837980fd9f ("net: revert default NAPI poll timeout to 2 jiffies") +Fixes: 7acf8a1e8a28 ("Replace 2 jiffies with sysctl netdev_budget_usecs to enable softirq tuning") +Signed-off-by: Jiri Slaby (SUSE) +Cc: Dmitry Yakunin +Cc: Konstantin Khlebnikov +Link: https://patch.msgid.link/20250220110752.137639-1-jirislaby@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/core/sysctl_net_core.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c +index 47ca6d3ddbb56..75efc712bb9bc 100644 +--- a/net/core/sysctl_net_core.c ++++ b/net/core/sysctl_net_core.c +@@ -30,6 +30,7 @@ static int min_sndbuf = SOCK_MIN_SNDBUF; + static int min_rcvbuf = SOCK_MIN_RCVBUF; + static int max_skb_frags = MAX_SKB_FRAGS; + static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; ++static int netdev_budget_usecs_min = 2 * USEC_PER_SEC / HZ; + + static int net_msg_warn; /* Unused, but still a sysctl */ + +@@ -554,7 +555,7 @@ static struct ctl_table net_core_table[] = { + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, +- .extra1 = SYSCTL_ZERO, ++ .extra1 = &netdev_budget_usecs_min, + }, + { + .procname = "fb_tunnels_only_for_init_net", +-- +2.39.5 + diff --git a/queue-6.1/series b/queue-6.1/series index 64540815b6..f4af24f3e2 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -113,3 +113,30 @@ rdma-mlx5-fix-ah-static-rate-parsing.patch scsi-core-clear-driver-private-data-when-retrying-re.patch rdma-mlx5-fix-bind-qp-error-cleanup-flow.patch sunrpc-suppress-warnings-for-unused-procfs-functions.patch +alsa-usb-audio-avoid-dropping-midi-events-at-closing.patch +bluetooth-l2cap-fix-l2cap_ecred_conn_rsp-response.patch +afs-remove-variable-nr_servers.patch +afs-make-it-possible-to-find-the-volumes-that-are-us.patch +afs-fix-the-server_list-to-unuse-a-displaced-server-.patch +net-loopback-avoid-sending-ip-packets-without-an-eth.patch +net-set-the-minimum-for-net_hotdata.netdev_budget_us.patch +net-ipv4-add-tracepoint-for-icmp_send.patch +ipv4-icmp-pass-full-ds-field-to-ip_route_input.patch +ipv4-icmp-unmask-upper-dscp-bits-in-icmp_route_looku.patch +ipvlan-unmask-upper-dscp-bits-in-ipvlan_process_v4_o.patch +ipv4-convert-icmp_route_lookup-to-dscp_t.patch +ipv4-convert-ip_route_input-to-dscp_t.patch +ipvlan-prepare-ipvlan_process_v4_outbound-to-future-.patch +ipvlan-ensure-network-headers-are-in-skb-linear-part.patch +net-cadence-macb-synchronize-stats-calculations.patch +asoc-es8328-fix-route-from-dac-to-output.patch +ipvs-always-clear-ipvs_property-flag-in-skb_scrub_pa.patch +tcp-defer-ts_recent-changes-until-req-is-owned.patch +net-clear-old-fragment-checksum-value-in-napi_reuse_.patch +net-mvpp2-cls-fixed-non-ip-flow-with-vlan-tag-flow-d.patch +net-mlx5-irq-fix-null-string-in-debug-print.patch +include-net-add-static-inline-dst_dev_overhead-to-ds.patch +net-ipv6-seg6_iptunnel-mitigate-2-realloc-issue.patch +net-ipv6-fix-dst-ref-loop-on-input-in-seg6-lwt.patch +net-ipv6-rpl_iptunnel-mitigate-2-realloc-issue.patch +net-ipv6-fix-dst-ref-loop-on-input-in-rpl-lwt.patch diff --git a/queue-6.1/tcp-defer-ts_recent-changes-until-req-is-owned.patch b/queue-6.1/tcp-defer-ts_recent-changes-until-req-is-owned.patch new file mode 100644 index 0000000000..16812533e8 --- /dev/null +++ b/queue-6.1/tcp-defer-ts_recent-changes-until-req-is-owned.patch @@ -0,0 +1,92 @@ +From 5fc9d620bffc5b5e9d907f4afc4987ef50ddef8e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 24 Feb 2025 17:00:47 +0800 +Subject: tcp: Defer ts_recent changes until req is owned + +From: Wang Hai + +[ Upstream commit 8d52da23b6c68a0f6bad83959ebb61a2cf623c4e ] + +Recently a bug was discovered where the server had entered TCP_ESTABLISHED +state, but the upper layers were not notified. + +The same 5-tuple packet may be processed by different CPUSs, so two +CPUs may receive different ack packets at the same time when the +state is TCP_NEW_SYN_RECV. + +In that case, req->ts_recent in tcp_check_req may be changed concurrently, +which will probably cause the newsk's ts_recent to be incorrectly large. +So that tcp_validate_incoming will fail. At this point, newsk will not be +able to enter the TCP_ESTABLISHED. + +cpu1 cpu2 +tcp_check_req + tcp_check_req + req->ts_recent = rcv_tsval = t1 + req->ts_recent = rcv_tsval = t2 + + syn_recv_sock + tcp_sk(child)->rx_opt.ts_recent = req->ts_recent = t2 // t1 < t2 +tcp_child_process + tcp_rcv_state_process + tcp_validate_incoming + tcp_paws_check + if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win) + // t2 - t1 > paws_win, failed + tcp_v4_do_rcv + tcp_rcv_state_process + // TCP_ESTABLISHED + +The cpu2's skb or a newly received skb will call tcp_v4_do_rcv to get +the newsk into the TCP_ESTABLISHED state, but at this point it is no +longer possible to notify the upper layer application. A notification +mechanism could be added here, but the fix is more complex, so the +current fix is used. + +In tcp_check_req, req->ts_recent is used to assign a value to +tcp_sk(child)->rx_opt.ts_recent, so removing the change in req->ts_recent +and changing tcp_sk(child)->rx_opt.ts_recent directly after owning the +req fixes this bug. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Wang Hai +Reviewed-by: Jason Xing +Reviewed-by: Eric Dumazet +Reviewed-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_minisocks.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c +index c562cb965e742..bc94df0140bfd 100644 +--- a/net/ipv4/tcp_minisocks.c ++++ b/net/ipv4/tcp_minisocks.c +@@ -735,12 +735,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, + + /* In sequence, PAWS is OK. */ + +- /* TODO: We probably should defer ts_recent change once +- * we take ownership of @req. +- */ +- if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt)) +- WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval); +- + if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { + /* Truncate SYN, it is out of window starting + at tcp_rsk(req)->rcv_isn + 1. */ +@@ -789,6 +783,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, + if (!child) + goto listen_overflow; + ++ if (own_req && tmp_opt.saw_tstamp && ++ !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt)) ++ tcp_sk(child)->rx_opt.ts_recent = tmp_opt.rcv_tsval; ++ + if (own_req && rsk_drop_req(req)) { + reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req); + inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req); +-- +2.39.5 +