]>
Commit | Line | Data |
---|---|---|
69a513a2 GKH |
1 | From 816e846c2eb9129a3e0afa5f920c8bbc71efecaa Mon Sep 17 00:00:00 2001 |
2 | From: Aaron Knister <aaron.s.knister@nasa.gov> | |
3 | Date: Fri, 24 Aug 2018 08:42:46 -0400 | |
4 | Subject: IB/ipoib: Avoid a race condition between start_xmit and cm_rep_handler | |
5 | ||
6 | From: Aaron Knister <aaron.s.knister@nasa.gov> | |
7 | ||
8 | commit 816e846c2eb9129a3e0afa5f920c8bbc71efecaa upstream. | |
9 | ||
10 | Inside of start_xmit() the call to check if the connection is up and the | |
11 | queueing of the packets for later transmission is not atomic which leaves | |
12 | a window where cm_rep_handler can run, set the connection up, dequeue | |
13 | pending packets and leave the subsequently queued packets by start_xmit() | |
14 | sitting on neigh->queue until they're dropped when the connection is torn | |
15 | down. This only applies to connected mode. These dropped packets can | |
16 | really upset TCP, for example, and cause multi-minute delays in | |
17 | transmission for open connections. | |
18 | ||
19 | Here's the code in start_xmit where we check to see if the connection is | |
20 | up: | |
21 | ||
22 | if (ipoib_cm_get(neigh)) { | |
23 | if (ipoib_cm_up(neigh)) { | |
24 | ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); | |
25 | goto unref; | |
26 | } | |
27 | } | |
28 | ||
29 | The race occurs if cm_rep_handler execution occurs after the above | |
30 | connection check (specifically if it gets to the point where it acquires | |
31 | priv->lock to dequeue pending skb's) but before the below code snippet in | |
32 | start_xmit where packets are queued. | |
33 | ||
34 | if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { | |
35 | push_pseudo_header(skb, phdr->hwaddr); | |
36 | spin_lock_irqsave(&priv->lock, flags); | |
37 | __skb_queue_tail(&neigh->queue, skb); | |
38 | spin_unlock_irqrestore(&priv->lock, flags); | |
39 | } else { | |
40 | ++dev->stats.tx_dropped; | |
41 | dev_kfree_skb_any(skb); | |
42 | } | |
43 | ||
44 | The patch acquires the netif tx lock in cm_rep_handler for the section | |
45 | where it sets the connection up and dequeues and retransmits deferred | |
46 | skb's. | |
47 | ||
48 | Fixes: 839fcaba355a ("IPoIB: Connected mode experimental support") | |
49 | Cc: stable@vger.kernel.org | |
50 | Signed-off-by: Aaron Knister <aaron.s.knister@nasa.gov> | |
51 | Tested-by: Ira Weiny <ira.weiny@intel.com> | |
52 | Reviewed-by: Ira Weiny <ira.weiny@intel.com> | |
53 | Signed-off-by: Jason Gunthorpe <jgg@mellanox.com> | |
54 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
55 | ||
56 | --- | |
57 | drivers/infiniband/ulp/ipoib/ipoib_cm.c | 2 ++ | |
58 | 1 file changed, 2 insertions(+) | |
59 | ||
60 | --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c | |
61 | +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c | |
62 | @@ -1009,12 +1009,14 @@ static int ipoib_cm_rep_handler(struct i | |
63 | ||
64 | skb_queue_head_init(&skqueue); | |
65 | ||
66 | + netif_tx_lock_bh(p->dev); | |
67 | spin_lock_irq(&priv->lock); | |
68 | set_bit(IPOIB_FLAG_OPER_UP, &p->flags); | |
69 | if (p->neigh) | |
70 | while ((skb = __skb_dequeue(&p->neigh->queue))) | |
71 | __skb_queue_tail(&skqueue, skb); | |
72 | spin_unlock_irq(&priv->lock); | |
73 | + netif_tx_unlock_bh(p->dev); | |
74 | ||
75 | while ((skb = __skb_dequeue(&skqueue))) { | |
76 | skb->dev = p->dev; |