]> git.ipfire.org Git - thirdparty/hostap.git/commitdiff
nl80211: Delay event processing during command handling
authorBenjamin Berg <benjamin.berg@intel.com>
Thu, 7 Aug 2025 11:25:57 +0000 (13:25 +0200)
committerJouni Malinen <j@w1.fi>
Mon, 6 Oct 2025 20:40:05 +0000 (23:40 +0300)
Unrelated nl80211 events may arrive while the driver is waiting for the
confirmation of another command. These events must not be delivered
immediately as they may confuse the internal state machine. They also
must be delivered, but some commands would cause them to be dropped.

Fix this up by queuing all events for later processing. Note that this
code is not very elegant as libnl does not export the nl_cb_call()
function. Add a hook into the two relevant functions that process
events. This hook will forward command replies to the correct handler
and queue the event if they should not be processed immediately.

Note that in a lot of cases this cannot happen because different nl80211
sockets are used for different purposes. However, the EAPOL frames
specifically have to be delivered over the same socket that all
connection related commands are done. So for these notifications the
race condition can happen and could cause a state confusion in
wpa_supplicant.

An example of this happening was observed in the autogo_pbc test where
wpa_supplicant would initiate a deauth and during that time also handle
an EAPOL frame that itself caused another deauthentication. This
resulted in a double free of wpa_s->current_ssid.

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
src/drivers/driver_nl80211.c
src/drivers/driver_nl80211.h
src/drivers/driver_nl80211_event.c
tests/hwsim/test_cfg80211.py

index 6e46b67b993403cac9ca624e130ee7f9af26ca0b..f31c54dc40d739ac115e66d3ca5196440a3e98e7 100644 (file)
@@ -501,6 +501,111 @@ out:
 }
 
 
+struct nl80211_pending_event {
+       struct dl_list list;
+
+       struct nl_msg *msg;
+       int (*handler)(struct nl_msg *, void *);
+       void *data;
+};
+
+
+static void nl80211_remove_pending_event(struct nl80211_pending_event *event)
+{
+       dl_list_del(&event->list);
+       nlmsg_free(event->msg);
+       os_free(event);
+}
+
+
+static void nl80211_deliver_pending_events(void *eloop_ctx, void *data)
+{
+       struct nl80211_global *global = eloop_ctx;
+       struct dl_list pending_events;
+
+       wpa_printf(MSG_DEBUG, "nl80211: Delivering pending events");
+
+       global->pending_events.next->prev = &pending_events;
+       global->pending_events.prev->next = &pending_events;
+       pending_events.next = global->pending_events.next;
+       pending_events.prev = global->pending_events.prev;
+       dl_list_init(&global->pending_events);
+
+       while (!dl_list_empty(&pending_events)) {
+               struct nl80211_pending_event *event =
+                       dl_list_first(&pending_events,
+                                     struct nl80211_pending_event, list);
+
+               event->handler(event->msg, event->data);
+               nl80211_remove_pending_event(event);
+       }
+}
+
+
+/* Unfortunately, libnl does not permit us to call an existing handler. If we
+ * could either fetch the information or if nl_cb_call was exposed, this would
+ * be much more elegant. Unfortunately it is not, so we need a hook in every
+ * event handler that might need to be overriden.
+ */
+int nl80211_reply_hook(struct nl80211_global *global, struct nl_msg *msg,
+                      int (*delayed_handler)(struct nl_msg *, void *),
+                      void *delayed_data)
+{
+       struct nlmsghdr *hdr = nlmsg_hdr(msg);
+       struct nl80211_pending_event *event;
+
+       if (!global->sync_reply_handling) {
+               /* We cannot rely on eloop handling the timeout before trying
+                * to read the socket again (it should usually, but it may not
+                * if multiple timeouts have expired). As such, check here if
+                * there are already pending events, and if there are, queue
+                * this one.
+                */
+               if (!dl_list_empty(&global->pending_events))
+                       goto queue_event;
+
+               return NL_OK;
+       }
+
+       /* This may be a reply if the port ID is nonzero (broadcast). In that
+        * case the sequence number should also match. We will treat everything
+        * else as an event and queue it for later processing.
+        */
+       if (hdr->nlmsg_pid && hdr->nlmsg_seq == global->reply_seq) {
+               if (global->reply_handler)
+                       global->reply_handler(msg, global->reply_data);
+
+               return NL_SKIP;
+       }
+
+       /* Schedule a timeout to deliver the events from the eventloop. */
+       if (dl_list_empty(&global->pending_events)) {
+               if (eloop_register_timeout(0, 0, nl80211_deliver_pending_events,
+                                          global, NULL) < 0)
+                       wpa_printf(MSG_ERROR, "%s: failed to register timeout",
+                                  __func__);
+       }
+
+queue_event:
+       TEST_FAIL_TAG("queued");
+
+       event = os_zalloc(sizeof(*event));
+       if (!event) {
+               wpa_printf(MSG_ERROR,
+                          "%s: failed to allocate delayed event context",
+                          __func__);
+               return NL_SKIP;
+       }
+
+       event->handler = delayed_handler;
+       event->data = delayed_data;
+       nlmsg_get(msg);
+       event->msg = msg;
+       dl_list_add_tail(&global->pending_events, &event->list);
+
+       return NL_SKIP;
+}
+
 int send_and_recv_glb(struct nl80211_global *global,
                      struct wpa_driver_nl80211_data *drv, /* may be NULL */
                      struct nl_sock *nl_handle, struct nl_msg *msg,
@@ -541,6 +646,9 @@ int send_and_recv_glb(struct nl80211_global *global,
                           "nl80211: setsockopt(NETLINK_CAP_ACK) failed: %s (ignored)",
                           strerror(errno));
 
+       if (TEST_FAIL_TAG("pre-send-sleep"))
+               os_sleep(1, 0);
+
        err.err = nl_send_auto_complete(nl_handle, msg);
        if (err.err < 0) {
                wpa_printf(MSG_INFO,
@@ -567,12 +675,13 @@ int send_and_recv_glb(struct nl80211_global *global,
                nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM,
                          ack_handler_custom, ack_data);
        } else {
-               nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, ack_handler, &err);
+               nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, ack_handler, &err.err);
        }
 
-       if (valid_handler)
-               nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM,
-                         valid_handler, valid_data);
+       global->sync_reply_handling = true;
+       global->reply_seq = nlmsg_hdr(msg)->nlmsg_seq;
+       global->reply_handler = valid_handler;
+       global->reply_data = valid_data;
 
        while (err.err > 0) {
                int res = nl_recvmsgs(nl_handle, cb);
@@ -596,6 +705,12 @@ int send_and_recv_glb(struct nl80211_global *global,
                                   __func__, res, nl_geterror(res));
                }
        }
+
+       global->sync_reply_handling = false;
+       global->reply_seq = 0;
+       global->reply_handler = NULL;
+       global->reply_data = NULL;
+
  out:
        nl_cb_put(cb);
        /* Always clear the message as it can potentially contain keys */
@@ -2063,6 +2178,14 @@ static int wpa_driver_nl80211_init_nl_global(struct nl80211_global *global)
        if (global->nl_event == NULL)
                goto err;
 
+       /* Needs to be registered early so that process_global_event() calls
+        * the sync reply handler hook.
+        */
+       nl_cb_set(global->nl_cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM,
+                 no_seq_check, NULL);
+       nl_cb_set(global->nl_cb, NL_CB_VALID, NL_CB_CUSTOM,
+                 process_global_event, global);
+
        ret = nl_get_multicast_id(global, "nl80211", "scan");
        if (ret >= 0)
                ret = nl_socket_add_membership(global->nl_event, ret);
@@ -2126,11 +2249,6 @@ static int wpa_driver_nl80211_init_nl_global(struct nl80211_global *global)
        genl_family_put(family);
        nl_cache_free(cache);
 
-       nl_cb_set(global->nl_cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM,
-                 no_seq_check, NULL);
-       nl_cb_set(global->nl_cb, NL_CB_VALID, NL_CB_CUSTOM,
-                 process_global_event, global);
-
        nl80211_register_eloop_read(&global->nl_event,
                                    wpa_driver_nl80211_event_receive,
                                    global->nl_cb, 0);
@@ -2309,11 +2427,21 @@ static int nl80211_init_bss(struct i802_bss *bss)
 
 static void nl80211_destroy_bss(struct i802_bss *bss)
 {
+       struct nl80211_pending_event *event, *next;
+
        nl_cb_put(bss->nl_cb);
        bss->nl_cb = NULL;
 
        if (bss->nl_connect)
                nl80211_destroy_eloop_handle(&bss->nl_connect, 1);
+
+       dl_list_for_each_safe(event, next, &bss->drv->global->pending_events,
+                             struct nl80211_pending_event, list) {
+               if (event->data != bss)
+                       continue;
+
+               nl80211_remove_pending_event(event);
+       }
 }
 
 
@@ -10271,6 +10399,7 @@ static void * nl80211_global_init(void *ctx)
        global->ctx = ctx;
        global->ioctl_sock = -1;
        dl_list_init(&global->interfaces);
+       dl_list_init(&global->pending_events);
        global->if_add_ifindex = -1;
 
        cfg = os_zalloc(sizeof(*cfg));
@@ -10315,6 +10444,16 @@ static void nl80211_global_deinit(void *priv)
                           dl_list_len(&global->interfaces));
        }
 
+       eloop_cancel_timeout(nl80211_deliver_pending_events, global, NULL);
+
+       while (!dl_list_empty(&global->pending_events)) {
+               struct nl80211_pending_event *event =
+                       dl_list_first(&global->pending_events,
+                                     struct nl80211_pending_event, list);
+
+               nl80211_remove_pending_event(event);
+       }
+
        if (global->netlink)
                netlink_deinit(global->netlink);
 
index 2ef326c9384b0896fb855525c7c50100e56120a7..3d5eb493d68b730ac3a3ec86806447b6ad6bb389 100644 (file)
@@ -22,6 +22,8 @@
        nla_nest_start(msg, NLA_F_NESTED | (attrtype))
 #endif
 
+struct nl_msg;
+
 struct nl80211_global {
        void *ctx;
        struct dl_list interfaces;
@@ -37,6 +39,15 @@ struct nl80211_global {
        int ioctl_sock; /* socket for ioctl() use */
 
        struct nl_sock *nl_event;
+
+       /* Handling of sync replies */
+       bool sync_reply_handling;
+       u32 reply_seq;
+       int (*reply_handler)(struct nl_msg *, void *);
+       void *reply_data;
+
+       /* pending events that happened while waiting for a sync reply */
+       struct dl_list pending_events;
 };
 
 struct nl80211_wiphy_data {
@@ -268,8 +279,6 @@ struct wpa_driver_nl80211_data {
 #endif /* CONFIG_DRIVER_NL80211_QCA */
 };
 
-struct nl_msg;
-
 struct nl80211_err_info {
        int link_id;
 };
@@ -281,6 +290,10 @@ struct nl_msg * nl80211_drv_msg(struct wpa_driver_nl80211_data *drv, int flags,
                                uint8_t cmd);
 struct nl_msg * nl80211_bss_msg(struct i802_bss *bss, int flags, uint8_t cmd);
 
+int nl80211_reply_hook(struct nl80211_global *global, struct nl_msg *msg,
+                      int (*delayed_handler)(struct nl_msg *, void *),
+                      void *delayed_data);
+
 int send_and_recv_glb(struct nl80211_global *global,
                      struct wpa_driver_nl80211_data *drv, /* may be NULL */
                      struct nl_sock *nl_handle, struct nl_msg *msg,
index 7a54ff21e1d9ffa047e648527ce692cc6fe74451..f6eec16c981c33079ec75e52debca6ad4b712aa8 100644 (file)
@@ -4438,6 +4438,11 @@ int process_global_event(struct nl_msg *msg, void *arg)
        int wdev_id_set = 0;
        int wiphy_idx_set = 0;
        bool processed = false;
+       int res;
+
+       res = nl80211_reply_hook(global, msg, process_global_event, arg);
+       if (res != NL_OK)
+               return res;
 
        /* Event marker, all prior events have been processed */
        if (gnlh->cmd == NL80211_CMD_GET_PROTOCOL_FEATURES) {
@@ -4530,6 +4535,12 @@ int process_bss_event(struct nl_msg *msg, void *arg)
        struct i802_bss *bss = arg;
        struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
        struct nlattr *tb[NL80211_ATTR_MAX + 1];
+       int res;
+
+       res = nl80211_reply_hook(bss->drv->global, msg,
+                                process_bss_event, arg);
+       if (res != NL_OK)
+               return res;
 
        nla_parse(tb, NL80211_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
                  genlmsg_attrlen(gnlh, 0), NULL);
index 835f55cd6926c1a489e5b263cce38808ba5638ab..ea5911cac3aecb7594ef3c3065733da200968927 100644 (file)
@@ -9,6 +9,7 @@ import logging
 logger = logging.getLogger()
 import binascii
 import os
+import threading
 import time
 
 import hostapd
@@ -150,3 +151,54 @@ def test_cfg80211_hostapd_ext_sta_remove(dev, apdev):
     # further action happens here. If that event were to be used to remove the
     # STA entry from hostapd even in device_ap_sme == 0 case, this test case
     # could be extended to cover additional operations.
+
+def test_nl80211_event_during_cmd(dev, apdev):
+    """nl80211 event queueing during command handling"""
+
+    # Test an event arriving while processing send_and_recv works fine,
+    # i.e., it is pushed to pending_events and processed from a timer after
+    # completion.
+    # Note that we can only trigger this when the event is on the same socket
+    # that we are sending the command on. This is primarily the case for the
+    # nl_connect socket. We trigger the case here by sending an EAPOL frame
+    # while a DEAUTH command is queued up already.
+    params = hostapd.wpa2_params(ssid='test', passphrase='test1234',
+                                 wpa_key_mgmt="WPA-PSK")
+    hapd = hostapd.add_ap(apdev[0], params)
+
+    dev[0].connect('test', psk='test1234', key_mgmt="WPA-PSK")
+    ev = hapd.wait_event(["AP-STA-CONNECTED"], timeout=5)
+    if ev is None:
+        raise Exception("No connection event received from hostapd")
+
+    dev[0].dump_monitor()
+
+    # This inserts a 1 second sleep in front of the next nl80211 request. Next
+    # the "queued" fail test checks that an event was queued (i.e., the EAPOL
+    # frame).
+    with fail_test(dev[0],
+                   1, "pre-send-sleep;send_and_recv",
+                   1, "queued;nl80211_reply_hook"):
+
+        def disconnect_thread_func():
+            assert 'OK' in dev[0].request('DISCONNECT'), 'Failed to disconnect'
+
+        disconnect_thread = threading.Thread(target=disconnect_thread_func)
+        disconnect_thread.start()
+
+        # Wait a little bit to ensure wpa_supplicant is in the pre-send-sleep
+        time.sleep(0.2)
+
+        # Trigger an EAPOL frame from hapd
+        hapd.request('REKEY_GTK')
+
+        disconnect_thread.join()
+
+        # Finally verify that the EAPOL frame was processed
+        ev = dev[0].wait_event(['EAPOL-RX'], timeout=1)
+        if ev is None:
+            raise Exception("EAPOL event was not processed properly")
+
+    ev = hapd.wait_event(["AP-STA-DISCONNECTED"], timeout=5)
+    if ev is None:
+        raise Exception("No disconnection event received from hostapd")