From: Benjamin Berg Date: Thu, 7 Aug 2025 11:25:57 +0000 (+0200) Subject: nl80211: Delay event processing during command handling X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=f5790e97cd64;p=thirdparty%2Fhostap.git nl80211: Delay event processing during command handling Unrelated nl80211 events may arrive while the driver is waiting for the confirmation of another command. These events must not be delivered immediately as they may confuse the internal state machine. They also must be delivered, but some commands would cause them to be dropped. Fix this up by queuing all events for later processing. Note that this code is not very elegant as libnl does not export the nl_cb_call() function. Add a hook into the two relevant functions that process events. This hook will forward command replies to the correct handler and queue the event if they should not be processed immediately. Note that in a lot of cases this cannot happen because different nl80211 sockets are used for different purposes. However, the EAPOL frames specifically have to be delivered over the same socket that all connection related commands are done. So for these notifications the race condition can happen and could cause a state confusion in wpa_supplicant. An example of this happening was observed in the autogo_pbc test where wpa_supplicant would initiate a deauth and during that time also handle an EAPOL frame that itself caused another deauthentication. This resulted in a double free of wpa_s->current_ssid. Signed-off-by: Benjamin Berg --- diff --git a/src/drivers/driver_nl80211.c b/src/drivers/driver_nl80211.c index 6e46b67b9..f31c54dc4 100644 --- a/src/drivers/driver_nl80211.c +++ b/src/drivers/driver_nl80211.c @@ -501,6 +501,111 @@ out: } +struct nl80211_pending_event { + struct dl_list list; + + struct nl_msg *msg; + int (*handler)(struct nl_msg *, void *); + void *data; +}; + + +static void nl80211_remove_pending_event(struct nl80211_pending_event *event) +{ + dl_list_del(&event->list); + nlmsg_free(event->msg); + os_free(event); +} + + +static void nl80211_deliver_pending_events(void *eloop_ctx, void *data) +{ + struct nl80211_global *global = eloop_ctx; + struct dl_list pending_events; + + wpa_printf(MSG_DEBUG, "nl80211: Delivering pending events"); + + global->pending_events.next->prev = &pending_events; + global->pending_events.prev->next = &pending_events; + pending_events.next = global->pending_events.next; + pending_events.prev = global->pending_events.prev; + dl_list_init(&global->pending_events); + + while (!dl_list_empty(&pending_events)) { + struct nl80211_pending_event *event = + dl_list_first(&pending_events, + struct nl80211_pending_event, list); + + event->handler(event->msg, event->data); + nl80211_remove_pending_event(event); + } +} + + +/* Unfortunately, libnl does not permit us to call an existing handler. If we + * could either fetch the information or if nl_cb_call was exposed, this would + * be much more elegant. Unfortunately it is not, so we need a hook in every + * event handler that might need to be overriden. + */ +int nl80211_reply_hook(struct nl80211_global *global, struct nl_msg *msg, + int (*delayed_handler)(struct nl_msg *, void *), + void *delayed_data) +{ + struct nlmsghdr *hdr = nlmsg_hdr(msg); + struct nl80211_pending_event *event; + + if (!global->sync_reply_handling) { + /* We cannot rely on eloop handling the timeout before trying + * to read the socket again (it should usually, but it may not + * if multiple timeouts have expired). As such, check here if + * there are already pending events, and if there are, queue + * this one. + */ + if (!dl_list_empty(&global->pending_events)) + goto queue_event; + + return NL_OK; + } + + /* This may be a reply if the port ID is nonzero (broadcast). In that + * case the sequence number should also match. We will treat everything + * else as an event and queue it for later processing. + */ + if (hdr->nlmsg_pid && hdr->nlmsg_seq == global->reply_seq) { + if (global->reply_handler) + global->reply_handler(msg, global->reply_data); + + return NL_SKIP; + } + + /* Schedule a timeout to deliver the events from the eventloop. */ + if (dl_list_empty(&global->pending_events)) { + if (eloop_register_timeout(0, 0, nl80211_deliver_pending_events, + global, NULL) < 0) + wpa_printf(MSG_ERROR, "%s: failed to register timeout", + __func__); + } + +queue_event: + TEST_FAIL_TAG("queued"); + + event = os_zalloc(sizeof(*event)); + if (!event) { + wpa_printf(MSG_ERROR, + "%s: failed to allocate delayed event context", + __func__); + return NL_SKIP; + } + + event->handler = delayed_handler; + event->data = delayed_data; + nlmsg_get(msg); + event->msg = msg; + dl_list_add_tail(&global->pending_events, &event->list); + + return NL_SKIP; +} + int send_and_recv_glb(struct nl80211_global *global, struct wpa_driver_nl80211_data *drv, /* may be NULL */ struct nl_sock *nl_handle, struct nl_msg *msg, @@ -541,6 +646,9 @@ int send_and_recv_glb(struct nl80211_global *global, "nl80211: setsockopt(NETLINK_CAP_ACK) failed: %s (ignored)", strerror(errno)); + if (TEST_FAIL_TAG("pre-send-sleep")) + os_sleep(1, 0); + err.err = nl_send_auto_complete(nl_handle, msg); if (err.err < 0) { wpa_printf(MSG_INFO, @@ -567,12 +675,13 @@ int send_and_recv_glb(struct nl80211_global *global, nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, ack_handler_custom, ack_data); } else { - nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, ack_handler, &err); + nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, ack_handler, &err.err); } - if (valid_handler) - nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, - valid_handler, valid_data); + global->sync_reply_handling = true; + global->reply_seq = nlmsg_hdr(msg)->nlmsg_seq; + global->reply_handler = valid_handler; + global->reply_data = valid_data; while (err.err > 0) { int res = nl_recvmsgs(nl_handle, cb); @@ -596,6 +705,12 @@ int send_and_recv_glb(struct nl80211_global *global, __func__, res, nl_geterror(res)); } } + + global->sync_reply_handling = false; + global->reply_seq = 0; + global->reply_handler = NULL; + global->reply_data = NULL; + out: nl_cb_put(cb); /* Always clear the message as it can potentially contain keys */ @@ -2063,6 +2178,14 @@ static int wpa_driver_nl80211_init_nl_global(struct nl80211_global *global) if (global->nl_event == NULL) goto err; + /* Needs to be registered early so that process_global_event() calls + * the sync reply handler hook. + */ + nl_cb_set(global->nl_cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, + no_seq_check, NULL); + nl_cb_set(global->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, + process_global_event, global); + ret = nl_get_multicast_id(global, "nl80211", "scan"); if (ret >= 0) ret = nl_socket_add_membership(global->nl_event, ret); @@ -2126,11 +2249,6 @@ static int wpa_driver_nl80211_init_nl_global(struct nl80211_global *global) genl_family_put(family); nl_cache_free(cache); - nl_cb_set(global->nl_cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, - no_seq_check, NULL); - nl_cb_set(global->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, - process_global_event, global); - nl80211_register_eloop_read(&global->nl_event, wpa_driver_nl80211_event_receive, global->nl_cb, 0); @@ -2309,11 +2427,21 @@ static int nl80211_init_bss(struct i802_bss *bss) static void nl80211_destroy_bss(struct i802_bss *bss) { + struct nl80211_pending_event *event, *next; + nl_cb_put(bss->nl_cb); bss->nl_cb = NULL; if (bss->nl_connect) nl80211_destroy_eloop_handle(&bss->nl_connect, 1); + + dl_list_for_each_safe(event, next, &bss->drv->global->pending_events, + struct nl80211_pending_event, list) { + if (event->data != bss) + continue; + + nl80211_remove_pending_event(event); + } } @@ -10271,6 +10399,7 @@ static void * nl80211_global_init(void *ctx) global->ctx = ctx; global->ioctl_sock = -1; dl_list_init(&global->interfaces); + dl_list_init(&global->pending_events); global->if_add_ifindex = -1; cfg = os_zalloc(sizeof(*cfg)); @@ -10315,6 +10444,16 @@ static void nl80211_global_deinit(void *priv) dl_list_len(&global->interfaces)); } + eloop_cancel_timeout(nl80211_deliver_pending_events, global, NULL); + + while (!dl_list_empty(&global->pending_events)) { + struct nl80211_pending_event *event = + dl_list_first(&global->pending_events, + struct nl80211_pending_event, list); + + nl80211_remove_pending_event(event); + } + if (global->netlink) netlink_deinit(global->netlink); diff --git a/src/drivers/driver_nl80211.h b/src/drivers/driver_nl80211.h index 2ef326c93..3d5eb493d 100644 --- a/src/drivers/driver_nl80211.h +++ b/src/drivers/driver_nl80211.h @@ -22,6 +22,8 @@ nla_nest_start(msg, NLA_F_NESTED | (attrtype)) #endif +struct nl_msg; + struct nl80211_global { void *ctx; struct dl_list interfaces; @@ -37,6 +39,15 @@ struct nl80211_global { int ioctl_sock; /* socket for ioctl() use */ struct nl_sock *nl_event; + + /* Handling of sync replies */ + bool sync_reply_handling; + u32 reply_seq; + int (*reply_handler)(struct nl_msg *, void *); + void *reply_data; + + /* pending events that happened while waiting for a sync reply */ + struct dl_list pending_events; }; struct nl80211_wiphy_data { @@ -268,8 +279,6 @@ struct wpa_driver_nl80211_data { #endif /* CONFIG_DRIVER_NL80211_QCA */ }; -struct nl_msg; - struct nl80211_err_info { int link_id; }; @@ -281,6 +290,10 @@ struct nl_msg * nl80211_drv_msg(struct wpa_driver_nl80211_data *drv, int flags, uint8_t cmd); struct nl_msg * nl80211_bss_msg(struct i802_bss *bss, int flags, uint8_t cmd); +int nl80211_reply_hook(struct nl80211_global *global, struct nl_msg *msg, + int (*delayed_handler)(struct nl_msg *, void *), + void *delayed_data); + int send_and_recv_glb(struct nl80211_global *global, struct wpa_driver_nl80211_data *drv, /* may be NULL */ struct nl_sock *nl_handle, struct nl_msg *msg, diff --git a/src/drivers/driver_nl80211_event.c b/src/drivers/driver_nl80211_event.c index 7a54ff21e..f6eec16c9 100644 --- a/src/drivers/driver_nl80211_event.c +++ b/src/drivers/driver_nl80211_event.c @@ -4438,6 +4438,11 @@ int process_global_event(struct nl_msg *msg, void *arg) int wdev_id_set = 0; int wiphy_idx_set = 0; bool processed = false; + int res; + + res = nl80211_reply_hook(global, msg, process_global_event, arg); + if (res != NL_OK) + return res; /* Event marker, all prior events have been processed */ if (gnlh->cmd == NL80211_CMD_GET_PROTOCOL_FEATURES) { @@ -4530,6 +4535,12 @@ int process_bss_event(struct nl_msg *msg, void *arg) struct i802_bss *bss = arg; struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); struct nlattr *tb[NL80211_ATTR_MAX + 1]; + int res; + + res = nl80211_reply_hook(bss->drv->global, msg, + process_bss_event, arg); + if (res != NL_OK) + return res; nla_parse(tb, NL80211_ATTR_MAX, genlmsg_attrdata(gnlh, 0), genlmsg_attrlen(gnlh, 0), NULL); diff --git a/tests/hwsim/test_cfg80211.py b/tests/hwsim/test_cfg80211.py index 835f55cd6..ea5911cac 100644 --- a/tests/hwsim/test_cfg80211.py +++ b/tests/hwsim/test_cfg80211.py @@ -9,6 +9,7 @@ import logging logger = logging.getLogger() import binascii import os +import threading import time import hostapd @@ -150,3 +151,54 @@ def test_cfg80211_hostapd_ext_sta_remove(dev, apdev): # further action happens here. If that event were to be used to remove the # STA entry from hostapd even in device_ap_sme == 0 case, this test case # could be extended to cover additional operations. + +def test_nl80211_event_during_cmd(dev, apdev): + """nl80211 event queueing during command handling""" + + # Test an event arriving while processing send_and_recv works fine, + # i.e., it is pushed to pending_events and processed from a timer after + # completion. + # Note that we can only trigger this when the event is on the same socket + # that we are sending the command on. This is primarily the case for the + # nl_connect socket. We trigger the case here by sending an EAPOL frame + # while a DEAUTH command is queued up already. + params = hostapd.wpa2_params(ssid='test', passphrase='test1234', + wpa_key_mgmt="WPA-PSK") + hapd = hostapd.add_ap(apdev[0], params) + + dev[0].connect('test', psk='test1234', key_mgmt="WPA-PSK") + ev = hapd.wait_event(["AP-STA-CONNECTED"], timeout=5) + if ev is None: + raise Exception("No connection event received from hostapd") + + dev[0].dump_monitor() + + # This inserts a 1 second sleep in front of the next nl80211 request. Next + # the "queued" fail test checks that an event was queued (i.e., the EAPOL + # frame). + with fail_test(dev[0], + 1, "pre-send-sleep;send_and_recv", + 1, "queued;nl80211_reply_hook"): + + def disconnect_thread_func(): + assert 'OK' in dev[0].request('DISCONNECT'), 'Failed to disconnect' + + disconnect_thread = threading.Thread(target=disconnect_thread_func) + disconnect_thread.start() + + # Wait a little bit to ensure wpa_supplicant is in the pre-send-sleep + time.sleep(0.2) + + # Trigger an EAPOL frame from hapd + hapd.request('REKEY_GTK') + + disconnect_thread.join() + + # Finally verify that the EAPOL frame was processed + ev = dev[0].wait_event(['EAPOL-RX'], timeout=1) + if ev is None: + raise Exception("EAPOL event was not processed properly") + + ev = hapd.wait_event(["AP-STA-DISCONNECTED"], timeout=5) + if ev is None: + raise Exception("No disconnection event received from hostapd")