From: Greg Kroah-Hartman Date: Mon, 29 Nov 2021 18:13:08 +0000 (+0100) Subject: 4.4-stable patches X-Git-Tag: v5.15.6~7 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ef5cb2170c2ed7f96f1b003d2babb72bba78e87b;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: tty-hvc-replace-bug_on-with-negative-return-value.patch xen-blkfront-don-t-take-local-copy-of-a-request-from-the-ring-page.patch xen-blkfront-don-t-trust-the-backend-response-data-blindly.patch xen-blkfront-read-response-from-backend-only-once.patch xen-netfront-disentangle-tx_skb_freelist.patch xen-netfront-don-t-read-data-from-request-on-the-ring-page.patch xen-netfront-don-t-trust-the-backend-response-data-blindly.patch xen-netfront-read-response-from-backend-only-once.patch xen-sync-include-xen-interface-io-ring.h-with-xen-s-newest-version.patch --- diff --git a/queue-4.4/series b/queue-4.4/series index c7f9b7b8f56..a7828cb1759 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -20,3 +20,12 @@ hugetlbfs-flush-tlbs-correctly-after-huge_pmd_unshare.patch proc-vmcore-fix-clearing-user-buffer-by-properly-using-clear_user.patch nfc-add-nci_unreg-flag-to-eliminate-the-race.patch fuse-release-pipe-buf-after-last-use.patch +xen-sync-include-xen-interface-io-ring.h-with-xen-s-newest-version.patch +xen-blkfront-read-response-from-backend-only-once.patch +xen-blkfront-don-t-take-local-copy-of-a-request-from-the-ring-page.patch +xen-blkfront-don-t-trust-the-backend-response-data-blindly.patch +xen-netfront-read-response-from-backend-only-once.patch +xen-netfront-don-t-read-data-from-request-on-the-ring-page.patch +xen-netfront-disentangle-tx_skb_freelist.patch +xen-netfront-don-t-trust-the-backend-response-data-blindly.patch +tty-hvc-replace-bug_on-with-negative-return-value.patch diff --git a/queue-4.4/tty-hvc-replace-bug_on-with-negative-return-value.patch b/queue-4.4/tty-hvc-replace-bug_on-with-negative-return-value.patch new file mode 100644 index 00000000000..87e3895fd47 --- /dev/null +++ b/queue-4.4/tty-hvc-replace-bug_on-with-negative-return-value.patch @@ -0,0 +1,61 @@ +From foo@baz Mon Nov 29 07:12:46 PM CET 2021 +From: Juergen Gross +Date: Mon, 29 Nov 2021 16:02:34 +0100 +Subject: tty: hvc: replace BUG_ON() with negative return value + +From: Juergen Gross + +commit e679004dec37566f658a255157d3aed9d762a2b7 upstream. + +Xen frontends shouldn't BUG() in case of illegal data received from +their backends. So replace the BUG_ON()s when reading illegal data from +the ring page with negative return values. + +Reviewed-by: Jan Beulich +Signed-off-by: Juergen Gross +Link: https://lore.kernel.org/r/20210707091045.460-1-jgross@suse.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/tty/hvc/hvc_xen.c | 17 ++++++++++++++--- + 1 file changed, 14 insertions(+), 3 deletions(-) + +--- a/drivers/tty/hvc/hvc_xen.c ++++ b/drivers/tty/hvc/hvc_xen.c +@@ -98,7 +98,11 @@ static int __write_console(struct xencon + cons = intf->out_cons; + prod = intf->out_prod; + mb(); /* update queue values before going on */ +- BUG_ON((prod - cons) > sizeof(intf->out)); ++ ++ if ((prod - cons) > sizeof(intf->out)) { ++ pr_err_once("xencons: Illegal ring page indices"); ++ return -EINVAL; ++ } + + while ((sent < len) && ((prod - cons) < sizeof(intf->out))) + intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++]; +@@ -126,7 +130,10 @@ static int domU_write_console(uint32_t v + */ + while (len) { + int sent = __write_console(cons, data, len); +- ++ ++ if (sent < 0) ++ return sent; ++ + data += sent; + len -= sent; + +@@ -150,7 +157,11 @@ static int domU_read_console(uint32_t vt + cons = intf->in_cons; + prod = intf->in_prod; + mb(); /* get pointers before reading ring */ +- BUG_ON((prod - cons) > sizeof(intf->in)); ++ ++ if ((prod - cons) > sizeof(intf->in)) { ++ pr_err_once("xencons: Illegal ring page indices"); ++ return -EINVAL; ++ } + + while (cons != prod && recv < len) + buf[recv++] = intf->in[MASK_XENCONS_IDX(cons++, intf->in)]; diff --git a/queue-4.4/xen-blkfront-don-t-take-local-copy-of-a-request-from-the-ring-page.patch b/queue-4.4/xen-blkfront-don-t-take-local-copy-of-a-request-from-the-ring-page.patch new file mode 100644 index 00000000000..670625a5edb --- /dev/null +++ b/queue-4.4/xen-blkfront-don-t-take-local-copy-of-a-request-from-the-ring-page.patch @@ -0,0 +1,104 @@ +From foo@baz Mon Nov 29 07:12:46 PM CET 2021 +From: Juergen Gross +Date: Mon, 29 Nov 2021 14:00:21 +0100 +Subject: xen/blkfront: don't take local copy of a request from the ring page + +From: Juergen Gross + +commit 8f5a695d99000fc3aa73934d7ced33cfc64dcdab upstream. + +In order to avoid a malicious backend being able to influence the local +copy of a request build the request locally first and then copy it to +the ring page instead of doing it the other way round as today. + +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +Acked-by: Roger Pau Monné +Link: https://lore.kernel.org/r/20210730103854.12681-3-jgross@suse.com +Signed-off-by: Juergen Gross +Signed-off-by: Greg Kroah-Hartman +--- + drivers/block/xen-blkfront.c | 38 ++++++++++++++++++++++++++------------ + 1 file changed, 26 insertions(+), 12 deletions(-) + +--- a/drivers/block/xen-blkfront.c ++++ b/drivers/block/xen-blkfront.c +@@ -456,16 +456,31 @@ static int blkif_ioctl(struct block_devi + return 0; + } + ++static unsigned long blkif_ring_get_request(struct blkfront_info *info, ++ struct request *req, ++ struct blkif_request **ring_req) ++{ ++ unsigned long id; ++ ++ *ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); ++ info->ring.req_prod_pvt++; ++ ++ id = get_id_from_freelist(info); ++ info->shadow[id].request = req; ++ info->shadow[id].req.u.rw.id = id; ++ ++ return id; ++} ++ + static int blkif_queue_discard_req(struct request *req) + { + struct blkfront_info *info = req->rq_disk->private_data; +- struct blkif_request *ring_req; ++ struct blkif_request *ring_req, *final_ring_req; + unsigned long id; + + /* Fill out a communications ring structure. */ +- ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); +- id = get_id_from_freelist(info); +- info->shadow[id].request = req; ++ id = blkif_ring_get_request(info, req, &final_ring_req); ++ ring_req = &info->shadow[id].req; + + ring_req->operation = BLKIF_OP_DISCARD; + ring_req->u.discard.nr_sectors = blk_rq_sectors(req); +@@ -478,8 +493,8 @@ static int blkif_queue_discard_req(struc + + info->ring.req_prod_pvt++; + +- /* Keep a private copy so we can reissue requests when recovering. */ +- info->shadow[id].req = *ring_req; ++ /* Copy the request to the ring page. */ ++ *final_ring_req = *ring_req; + + return 0; + } +@@ -569,7 +584,7 @@ static void blkif_setup_rw_req_grant(uns + static int blkif_queue_rw_req(struct request *req) + { + struct blkfront_info *info = req->rq_disk->private_data; +- struct blkif_request *ring_req; ++ struct blkif_request *ring_req, *final_ring_req; + unsigned long id; + int i; + struct setup_rw_req setup = { +@@ -613,9 +628,8 @@ static int blkif_queue_rw_req(struct req + new_persistent_gnts = 0; + + /* Fill out a communications ring structure. */ +- ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); +- id = get_id_from_freelist(info); +- info->shadow[id].request = req; ++ id = blkif_ring_get_request(info, req, &final_ring_req); ++ ring_req = &info->shadow[id].req; + + BUG_ON(info->max_indirect_segments == 0 && + GREFS(req->nr_phys_segments) > BLKIF_MAX_SEGMENTS_PER_REQUEST); +@@ -696,8 +710,8 @@ static int blkif_queue_rw_req(struct req + + info->ring.req_prod_pvt++; + +- /* Keep a private copy so we can reissue requests when recovering. */ +- info->shadow[id].req = *ring_req; ++ /* Copy request(s) to the ring page. */ ++ *final_ring_req = *ring_req; + + if (new_persistent_gnts) + gnttab_free_grant_references(setup.gref_head); diff --git a/queue-4.4/xen-blkfront-don-t-trust-the-backend-response-data-blindly.patch b/queue-4.4/xen-blkfront-don-t-trust-the-backend-response-data-blindly.patch new file mode 100644 index 00000000000..65d7c7aa41a --- /dev/null +++ b/queue-4.4/xen-blkfront-don-t-trust-the-backend-response-data-blindly.patch @@ -0,0 +1,181 @@ +From foo@baz Mon Nov 29 07:12:46 PM CET 2021 +From: Juergen Gross +Date: Mon, 29 Nov 2021 14:18:20 +0100 +Subject: xen/blkfront: don't trust the backend response data blindly + +From: Juergen Gross + +commit b94e4b147fd1992ad450e1fea1fdaa3738753373 upstream. + +Today blkfront will trust the backend to send only sane response data. +In order to avoid privilege escalations or crashes in case of malicious +backends verify the data to be within expected limits. Especially make +sure that the response always references an outstanding request. + +Introduce a new state of the ring BLKIF_STATE_ERROR which will be +switched to in case an inconsistency is being detected. Recovering from +this state is possible only via removing and adding the virtual device +again (e.g. via a suspend/resume cycle). + +Make all warning messages issued due to valid error responses rate +limited in order to avoid message floods being triggered by a malicious +backend. + +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +Acked-by: Roger Pau Monné +Link: https://lore.kernel.org/r/20210730103854.12681-4-jgross@suse.com +Signed-off-by: Juergen Gross +Signed-off-by: Greg Kroah-Hartman +--- + drivers/block/xen-blkfront.c | 57 ++++++++++++++++++++++++++++++++++--------- + 1 file changed, 46 insertions(+), 11 deletions(-) + +--- a/drivers/block/xen-blkfront.c ++++ b/drivers/block/xen-blkfront.c +@@ -64,6 +64,7 @@ enum blkif_state { + BLKIF_STATE_DISCONNECTED, + BLKIF_STATE_CONNECTED, + BLKIF_STATE_SUSPENDED, ++ BLKIF_STATE_ERROR, + }; + + struct grant { +@@ -79,6 +80,7 @@ struct blk_shadow { + struct grant **indirect_grants; + struct scatterlist *sg; + unsigned int num_sg; ++ bool inflight; + }; + + struct split_bio { +@@ -495,6 +497,7 @@ static int blkif_queue_discard_req(struc + + /* Copy the request to the ring page. */ + *final_ring_req = *ring_req; ++ info->shadow[id].inflight = true; + + return 0; + } +@@ -712,6 +715,7 @@ static int blkif_queue_rw_req(struct req + + /* Copy request(s) to the ring page. */ + *final_ring_req = *ring_req; ++ info->shadow[id].inflight = true; + + if (new_persistent_gnts) + gnttab_free_grant_references(setup.gref_head); +@@ -1324,11 +1328,17 @@ static irqreturn_t blkif_interrupt(int i + } + + again: +- rp = info->ring.sring->rsp_prod; ++ rp = READ_ONCE(info->ring.sring->rsp_prod); + rmb(); /* Ensure we see queued responses up to 'rp'. */ ++ if (RING_RESPONSE_PROD_OVERFLOW(&info->ring, rp)) { ++ pr_alert("%s: illegal number of responses %u\n", ++ info->gd->disk_name, rp - info->ring.rsp_cons); ++ goto err; ++ } + + for (i = info->ring.rsp_cons; i != rp; i++) { + unsigned long id; ++ unsigned int op; + + RING_COPY_RESPONSE(&info->ring, i, &bret); + id = bret.id; +@@ -1339,14 +1349,28 @@ static irqreturn_t blkif_interrupt(int i + * look in get_id_from_freelist. + */ + if (id >= BLK_RING_SIZE(info)) { +- WARN(1, "%s: response to %s has incorrect id (%ld)\n", +- info->gd->disk_name, op_name(bret.operation), id); +- /* We can't safely get the 'struct request' as +- * the id is busted. */ +- continue; ++ pr_alert("%s: response has incorrect id (%ld)\n", ++ info->gd->disk_name, id); ++ goto err; ++ } ++ if (!info->shadow[id].inflight) { ++ pr_alert("%s: response references no pending request\n", ++ info->gd->disk_name); ++ goto err; + } ++ ++ info->shadow[id].inflight = false; + req = info->shadow[id].request; + ++ op = info->shadow[id].req.operation; ++ if (op == BLKIF_OP_INDIRECT) ++ op = info->shadow[id].req.u.indirect.indirect_op; ++ if (bret.operation != op) { ++ pr_alert("%s: response has wrong operation (%u instead of %u)\n", ++ info->gd->disk_name, bret.operation, op); ++ goto err; ++ } ++ + if (bret.operation != BLKIF_OP_DISCARD) + blkif_completion(&info->shadow[id], info, &bret); + +@@ -1361,7 +1385,8 @@ static irqreturn_t blkif_interrupt(int i + case BLKIF_OP_DISCARD: + if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) { + struct request_queue *rq = info->rq; +- printk(KERN_WARNING "blkfront: %s: %s op failed\n", ++ ++ pr_warn_ratelimited("blkfront: %s: %s op failed\n", + info->gd->disk_name, op_name(bret.operation)); + error = -EOPNOTSUPP; + info->feature_discard = 0; +@@ -1374,13 +1399,13 @@ static irqreturn_t blkif_interrupt(int i + case BLKIF_OP_FLUSH_DISKCACHE: + case BLKIF_OP_WRITE_BARRIER: + if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) { +- printk(KERN_WARNING "blkfront: %s: %s op failed\n", ++ pr_warn_ratelimited("blkfront: %s: %s op failed\n", + info->gd->disk_name, op_name(bret.operation)); + error = -EOPNOTSUPP; + } + if (unlikely(bret.status == BLKIF_RSP_ERROR && + info->shadow[id].req.u.rw.nr_segments == 0)) { +- printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", ++ pr_warn_ratelimited("blkfront: %s: empty %s op failed\n", + info->gd->disk_name, op_name(bret.operation)); + error = -EOPNOTSUPP; + } +@@ -1394,8 +1419,9 @@ static irqreturn_t blkif_interrupt(int i + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + if (unlikely(bret.status != BLKIF_RSP_OKAY)) +- dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " +- "request: %x\n", bret.status); ++ dev_dbg_ratelimited(&info->xbdev->dev, ++ "Bad return from blkdev data request: %x\n", ++ bret.status); + + blk_mq_complete_request(req, error); + break; +@@ -1419,6 +1445,14 @@ static irqreturn_t blkif_interrupt(int i + spin_unlock_irqrestore(&info->io_lock, flags); + + return IRQ_HANDLED; ++ ++ err: ++ info->connected = BLKIF_STATE_ERROR; ++ ++ spin_unlock_irqrestore(&info->io_lock, flags); ++ ++ pr_alert("%s disabled for further use\n", info->gd->disk_name); ++ return IRQ_HANDLED; + } + + +@@ -1928,6 +1962,7 @@ out_of_memory: + info->shadow[i].sg = NULL; + kfree(info->shadow[i].indirect_grants); + info->shadow[i].indirect_grants = NULL; ++ info->shadow[i].inflight = false; + } + if (!list_empty(&info->indirect_pages)) { + struct page *indirect_page, *n; diff --git a/queue-4.4/xen-blkfront-read-response-from-backend-only-once.patch b/queue-4.4/xen-blkfront-read-response-from-backend-only-once.patch new file mode 100644 index 00000000000..4679838166f --- /dev/null +++ b/queue-4.4/xen-blkfront-read-response-from-backend-only-once.patch @@ -0,0 +1,117 @@ +From foo@baz Mon Nov 29 07:12:46 PM CET 2021 +From: Juergen Gross +Date: Mon, 29 Nov 2021 13:46:12 +0100 +Subject: xen/blkfront: read response from backend only once + +From: Juergen Gross + +commit 71b66243f9898d0e54296b4e7035fb33cdcb0707 upstream. + +In order to avoid problems in case the backend is modifying a response +on the ring page while the frontend has already seen it, just read the +response into a local buffer in one go and then operate on that buffer +only. + +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +Acked-by: Roger Pau Monné +Link: https://lore.kernel.org/r/20210730103854.12681-2-jgross@suse.com +Signed-off-by: Juergen Gross +Signed-off-by: Greg Kroah-Hartman +--- + drivers/block/xen-blkfront.c | 35 ++++++++++++++++++----------------- + 1 file changed, 18 insertions(+), 17 deletions(-) + +--- a/drivers/block/xen-blkfront.c ++++ b/drivers/block/xen-blkfront.c +@@ -1296,7 +1296,7 @@ static void blkif_completion(struct blk_ + static irqreturn_t blkif_interrupt(int irq, void *dev_id) + { + struct request *req; +- struct blkif_response *bret; ++ struct blkif_response bret; + RING_IDX i, rp; + unsigned long flags; + struct blkfront_info *info = (struct blkfront_info *)dev_id; +@@ -1316,8 +1316,9 @@ static irqreturn_t blkif_interrupt(int i + for (i = info->ring.rsp_cons; i != rp; i++) { + unsigned long id; + +- bret = RING_GET_RESPONSE(&info->ring, i); +- id = bret->id; ++ RING_COPY_RESPONSE(&info->ring, i, &bret); ++ id = bret.id; ++ + /* + * The backend has messed up and given us an id that we would + * never have given to it (we stamp it up to BLK_RING_SIZE - +@@ -1325,29 +1326,29 @@ static irqreturn_t blkif_interrupt(int i + */ + if (id >= BLK_RING_SIZE(info)) { + WARN(1, "%s: response to %s has incorrect id (%ld)\n", +- info->gd->disk_name, op_name(bret->operation), id); ++ info->gd->disk_name, op_name(bret.operation), id); + /* We can't safely get the 'struct request' as + * the id is busted. */ + continue; + } + req = info->shadow[id].request; + +- if (bret->operation != BLKIF_OP_DISCARD) +- blkif_completion(&info->shadow[id], info, bret); ++ if (bret.operation != BLKIF_OP_DISCARD) ++ blkif_completion(&info->shadow[id], info, &bret); + + if (add_id_to_freelist(info, id)) { + WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", +- info->gd->disk_name, op_name(bret->operation), id); ++ info->gd->disk_name, op_name(bret.operation), id); + continue; + } + +- error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; +- switch (bret->operation) { ++ error = (bret.status == BLKIF_RSP_OKAY) ? 0 : -EIO; ++ switch (bret.operation) { + case BLKIF_OP_DISCARD: +- if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { ++ if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) { + struct request_queue *rq = info->rq; + printk(KERN_WARNING "blkfront: %s: %s op failed\n", +- info->gd->disk_name, op_name(bret->operation)); ++ info->gd->disk_name, op_name(bret.operation)); + error = -EOPNOTSUPP; + info->feature_discard = 0; + info->feature_secdiscard = 0; +@@ -1358,15 +1359,15 @@ static irqreturn_t blkif_interrupt(int i + break; + case BLKIF_OP_FLUSH_DISKCACHE: + case BLKIF_OP_WRITE_BARRIER: +- if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { ++ if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) { + printk(KERN_WARNING "blkfront: %s: %s op failed\n", +- info->gd->disk_name, op_name(bret->operation)); ++ info->gd->disk_name, op_name(bret.operation)); + error = -EOPNOTSUPP; + } +- if (unlikely(bret->status == BLKIF_RSP_ERROR && ++ if (unlikely(bret.status == BLKIF_RSP_ERROR && + info->shadow[id].req.u.rw.nr_segments == 0)) { + printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", +- info->gd->disk_name, op_name(bret->operation)); ++ info->gd->disk_name, op_name(bret.operation)); + error = -EOPNOTSUPP; + } + if (unlikely(error)) { +@@ -1378,9 +1379,9 @@ static irqreturn_t blkif_interrupt(int i + /* fall through */ + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: +- if (unlikely(bret->status != BLKIF_RSP_OKAY)) ++ if (unlikely(bret.status != BLKIF_RSP_OKAY)) + dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " +- "request: %x\n", bret->status); ++ "request: %x\n", bret.status); + + blk_mq_complete_request(req, error); + break; diff --git a/queue-4.4/xen-netfront-disentangle-tx_skb_freelist.patch b/queue-4.4/xen-netfront-disentangle-tx_skb_freelist.patch new file mode 100644 index 00000000000..41f34e83f6a --- /dev/null +++ b/queue-4.4/xen-netfront-disentangle-tx_skb_freelist.patch @@ -0,0 +1,179 @@ +From foo@baz Mon Nov 29 07:12:46 PM CET 2021 +From: Juergen Gross +Date: Mon, 29 Nov 2021 15:58:51 +0100 +Subject: xen/netfront: disentangle tx_skb_freelist + +From: Juergen Gross + +commit 21631d2d741a64a073e167c27769e73bc7844a2f upstream. + +The tx_skb_freelist elements are in a single linked list with the +request id used as link reference. The per element link field is in a +union with the skb pointer of an in use request. + +Move the link reference out of the union in order to enable a later +reuse of it for requests which need a populated skb pointer. + +Rename add_id_to_freelist() and get_id_from_freelist() to +add_id_to_list() and get_id_from_list() in order to prepare using +those for other lists as well. Define ~0 as value to indicate the end +of a list and place that value into the link for a request not being +on the list. + +When freeing a skb zero the skb pointer in the request. Use a NULL +value of the skb pointer instead of skb_entry_is_link() for deciding +whether a request has a skb linked to it. + +Remove skb_entry_set_link() and open code it instead as it is really +trivial now. + +Signed-off-by: Juergen Gross +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netfront.c | 61 ++++++++++++++++++--------------------------- + 1 file changed, 25 insertions(+), 36 deletions(-) + +--- a/drivers/net/xen-netfront.c ++++ b/drivers/net/xen-netfront.c +@@ -120,17 +120,11 @@ struct netfront_queue { + + /* + * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries +- * are linked from tx_skb_freelist through skb_entry.link. +- * +- * NB. Freelist index entries are always going to be less than +- * PAGE_OFFSET, whereas pointers to skbs will always be equal or +- * greater than PAGE_OFFSET: we use this property to distinguish +- * them. ++ * are linked from tx_skb_freelist through tx_link. + */ +- union skb_entry { +- struct sk_buff *skb; +- unsigned long link; +- } tx_skbs[NET_TX_RING_SIZE]; ++ struct sk_buff *tx_skbs[NET_TX_RING_SIZE]; ++ unsigned short tx_link[NET_TX_RING_SIZE]; ++#define TX_LINK_NONE 0xffff + grant_ref_t gref_tx_head; + grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; + struct page *grant_tx_page[NET_TX_RING_SIZE]; +@@ -168,33 +162,25 @@ struct netfront_rx_info { + struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; + }; + +-static void skb_entry_set_link(union skb_entry *list, unsigned short id) +-{ +- list->link = id; +-} +- +-static int skb_entry_is_link(const union skb_entry *list) +-{ +- BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link)); +- return (unsigned long)list->skb < PAGE_OFFSET; +-} +- + /* + * Access macros for acquiring freeing slots in tx_skbs[]. + */ + +-static void add_id_to_freelist(unsigned *head, union skb_entry *list, +- unsigned short id) ++static void add_id_to_list(unsigned *head, unsigned short *list, ++ unsigned short id) + { +- skb_entry_set_link(&list[id], *head); ++ list[id] = *head; + *head = id; + } + +-static unsigned short get_id_from_freelist(unsigned *head, +- union skb_entry *list) ++static unsigned short get_id_from_list(unsigned *head, unsigned short *list) + { + unsigned int id = *head; +- *head = list[id].link; ++ ++ if (id != TX_LINK_NONE) { ++ *head = list[id]; ++ list[id] = TX_LINK_NONE; ++ } + return id; + } + +@@ -394,7 +380,8 @@ static void xennet_tx_buf_gc(struct netf + continue; + + id = txrsp.id; +- skb = queue->tx_skbs[id].skb; ++ skb = queue->tx_skbs[id]; ++ queue->tx_skbs[id] = NULL; + if (unlikely(gnttab_query_foreign_access( + queue->grant_tx_ref[id]) != 0)) { + pr_alert("%s: warning -- grant still in use by backend domain\n", +@@ -407,7 +394,7 @@ static void xennet_tx_buf_gc(struct netf + &queue->gref_tx_head, queue->grant_tx_ref[id]); + queue->grant_tx_ref[id] = GRANT_INVALID_REF; + queue->grant_tx_page[id] = NULL; +- add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, id); ++ add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, id); + dev_kfree_skb_irq(skb); + } + +@@ -450,7 +437,7 @@ static void xennet_tx_setup_grant(unsign + struct netfront_queue *queue = info->queue; + struct sk_buff *skb = info->skb; + +- id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs); ++ id = get_id_from_list(&queue->tx_skb_freelist, queue->tx_link); + tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++); + ref = gnttab_claim_grant_reference(&queue->gref_tx_head); + WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref)); +@@ -458,7 +445,7 @@ static void xennet_tx_setup_grant(unsign + gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id, + gfn, GNTMAP_readonly); + +- queue->tx_skbs[id].skb = skb; ++ queue->tx_skbs[id] = skb; + queue->grant_tx_page[id] = page; + queue->grant_tx_ref[id] = ref; + +@@ -1126,17 +1113,18 @@ static void xennet_release_tx_bufs(struc + + for (i = 0; i < NET_TX_RING_SIZE; i++) { + /* Skip over entries which are actually freelist references */ +- if (skb_entry_is_link(&queue->tx_skbs[i])) ++ if (!queue->tx_skbs[i]) + continue; + +- skb = queue->tx_skbs[i].skb; ++ skb = queue->tx_skbs[i]; ++ queue->tx_skbs[i] = NULL; + get_page(queue->grant_tx_page[i]); + gnttab_end_foreign_access(queue->grant_tx_ref[i], + GNTMAP_readonly, + (unsigned long)page_address(queue->grant_tx_page[i])); + queue->grant_tx_page[i] = NULL; + queue->grant_tx_ref[i] = GRANT_INVALID_REF; +- add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, i); ++ add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, i); + dev_kfree_skb_irq(skb); + } + } +@@ -1637,13 +1625,14 @@ static int xennet_init_queue(struct netf + snprintf(queue->name, sizeof(queue->name), "vif%s-q%u", + devid, queue->id); + +- /* Initialise tx_skbs as a free chain containing every entry. */ ++ /* Initialise tx_skb_freelist as a free chain containing every entry. */ + queue->tx_skb_freelist = 0; + for (i = 0; i < NET_TX_RING_SIZE; i++) { +- skb_entry_set_link(&queue->tx_skbs[i], i+1); ++ queue->tx_link[i] = i + 1; + queue->grant_tx_ref[i] = GRANT_INVALID_REF; + queue->grant_tx_page[i] = NULL; + } ++ queue->tx_link[NET_TX_RING_SIZE - 1] = TX_LINK_NONE; + + /* Clear out rx_skbs */ + for (i = 0; i < NET_RX_RING_SIZE; i++) { diff --git a/queue-4.4/xen-netfront-don-t-read-data-from-request-on-the-ring-page.patch b/queue-4.4/xen-netfront-don-t-read-data-from-request-on-the-ring-page.patch new file mode 100644 index 00000000000..0dbe32f3bf2 --- /dev/null +++ b/queue-4.4/xen-netfront-don-t-read-data-from-request-on-the-ring-page.patch @@ -0,0 +1,195 @@ +From foo@baz Mon Nov 29 07:12:46 PM CET 2021 +From: Juergen Gross +Date: Mon, 29 Nov 2021 15:58:09 +0100 +Subject: xen/netfront: don't read data from request on the ring page + +From: Juergen Gross + +commit 162081ec33c2686afa29d91bf8d302824aa846c7 upstream. + +In order to avoid a malicious backend being able to influence the local +processing of a request build the request locally first and then copy +it to the ring page. Any reading from the request influencing the +processing in the frontend needs to be done on the local instance. + +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netfront.c | 80 ++++++++++++++++++++------------------------- + 1 file changed, 37 insertions(+), 43 deletions(-) + +--- a/drivers/net/xen-netfront.c ++++ b/drivers/net/xen-netfront.c +@@ -433,7 +433,8 @@ struct xennet_gnttab_make_txreq { + struct netfront_queue *queue; + struct sk_buff *skb; + struct page *page; +- struct xen_netif_tx_request *tx; /* Last request */ ++ struct xen_netif_tx_request *tx; /* Last request on ring page */ ++ struct xen_netif_tx_request tx_local; /* Last request local copy*/ + unsigned int size; + }; + +@@ -461,30 +462,27 @@ static void xennet_tx_setup_grant(unsign + queue->grant_tx_page[id] = page; + queue->grant_tx_ref[id] = ref; + +- tx->id = id; +- tx->gref = ref; +- tx->offset = offset; +- tx->size = len; +- tx->flags = 0; ++ info->tx_local.id = id; ++ info->tx_local.gref = ref; ++ info->tx_local.offset = offset; ++ info->tx_local.size = len; ++ info->tx_local.flags = 0; ++ ++ *tx = info->tx_local; + + info->tx = tx; +- info->size += tx->size; ++ info->size += info->tx_local.size; + } + + static struct xen_netif_tx_request *xennet_make_first_txreq( +- struct netfront_queue *queue, struct sk_buff *skb, +- struct page *page, unsigned int offset, unsigned int len) ++ struct xennet_gnttab_make_txreq *info, ++ unsigned int offset, unsigned int len) + { +- struct xennet_gnttab_make_txreq info = { +- .queue = queue, +- .skb = skb, +- .page = page, +- .size = 0, +- }; ++ info->size = 0; + +- gnttab_for_one_grant(page, offset, len, xennet_tx_setup_grant, &info); ++ gnttab_for_one_grant(info->page, offset, len, xennet_tx_setup_grant, info); + +- return info.tx; ++ return info->tx; + } + + static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset, +@@ -497,35 +495,27 @@ static void xennet_make_one_txreq(unsign + xennet_tx_setup_grant(gfn, offset, len, data); + } + +-static struct xen_netif_tx_request *xennet_make_txreqs( +- struct netfront_queue *queue, struct xen_netif_tx_request *tx, +- struct sk_buff *skb, struct page *page, ++static void xennet_make_txreqs( ++ struct xennet_gnttab_make_txreq *info, ++ struct page *page, + unsigned int offset, unsigned int len) + { +- struct xennet_gnttab_make_txreq info = { +- .queue = queue, +- .skb = skb, +- .tx = tx, +- }; +- + /* Skip unused frames from start of page */ + page += offset >> PAGE_SHIFT; + offset &= ~PAGE_MASK; + + while (len) { +- info.page = page; +- info.size = 0; ++ info->page = page; ++ info->size = 0; + + gnttab_foreach_grant_in_range(page, offset, len, + xennet_make_one_txreq, +- &info); ++ info); + + page++; + offset = 0; +- len -= info.size; ++ len -= info->size; + } +- +- return info.tx; + } + + /* +@@ -578,7 +568,7 @@ static int xennet_start_xmit(struct sk_b + { + struct netfront_info *np = netdev_priv(dev); + struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats); +- struct xen_netif_tx_request *tx, *first_tx; ++ struct xen_netif_tx_request *first_tx; + unsigned int i; + int notify; + int slots; +@@ -587,6 +577,7 @@ static int xennet_start_xmit(struct sk_b + unsigned int len; + unsigned long flags; + struct netfront_queue *queue = NULL; ++ struct xennet_gnttab_make_txreq info = { }; + unsigned int num_queues = dev->real_num_tx_queues; + u16 queue_index; + +@@ -629,21 +620,24 @@ static int xennet_start_xmit(struct sk_b + } + + /* First request for the linear area. */ +- first_tx = tx = xennet_make_first_txreq(queue, skb, +- page, offset, len); +- offset += tx->size; ++ info.queue = queue; ++ info.skb = skb; ++ info.page = page; ++ first_tx = xennet_make_first_txreq(&info, offset, len); ++ offset += info.tx_local.size; + if (offset == PAGE_SIZE) { + page++; + offset = 0; + } +- len -= tx->size; ++ len -= info.tx_local.size; + + if (skb->ip_summed == CHECKSUM_PARTIAL) + /* local packet? */ +- tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated; ++ first_tx->flags |= XEN_NETTXF_csum_blank | ++ XEN_NETTXF_data_validated; + else if (skb->ip_summed == CHECKSUM_UNNECESSARY) + /* remote but checksummed. */ +- tx->flags |= XEN_NETTXF_data_validated; ++ first_tx->flags |= XEN_NETTXF_data_validated; + + /* Optional extra info after the first request. */ + if (skb_shinfo(skb)->gso_size) { +@@ -652,7 +646,7 @@ static int xennet_start_xmit(struct sk_b + gso = (struct xen_netif_extra_info *) + RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++); + +- tx->flags |= XEN_NETTXF_extra_info; ++ first_tx->flags |= XEN_NETTXF_extra_info; + + gso->u.gso.size = skb_shinfo(skb)->gso_size; + gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ? +@@ -666,13 +660,13 @@ static int xennet_start_xmit(struct sk_b + } + + /* Requests for the rest of the linear area. */ +- tx = xennet_make_txreqs(queue, tx, skb, page, offset, len); ++ xennet_make_txreqs(&info, page, offset, len); + + /* Requests for all the frags. */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; +- tx = xennet_make_txreqs(queue, tx, skb, +- skb_frag_page(frag), frag->page_offset, ++ xennet_make_txreqs(&info, skb_frag_page(frag), ++ frag->page_offset, + skb_frag_size(frag)); + } + diff --git a/queue-4.4/xen-netfront-don-t-trust-the-backend-response-data-blindly.patch b/queue-4.4/xen-netfront-don-t-trust-the-backend-response-data-blindly.patch new file mode 100644 index 00000000000..06dc027403c --- /dev/null +++ b/queue-4.4/xen-netfront-don-t-trust-the-backend-response-data-blindly.patch @@ -0,0 +1,237 @@ +From foo@baz Mon Nov 29 07:12:46 PM CET 2021 +From: Juergen Gross +Date: Mon, 29 Nov 2021 15:59:33 +0100 +Subject: xen/netfront: don't trust the backend response data blindly + +From: Juergen Gross + +commit a884daa61a7d91650987e855464526aef219590f upstream. + +Today netfront will trust the backend to send only sane response data. +In order to avoid privilege escalations or crashes in case of malicious +backends verify the data to be within expected limits. Especially make +sure that the response always references an outstanding request. + +Note that only the tx queue needs special id handling, as for the rx +queue the id is equal to the index in the ring page. + +Introduce a new indicator for the device whether it is broken and let +the device stop working when it is set. Set this indicator in case the +backend sets any weird data. + + +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netfront.c | 80 ++++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 75 insertions(+), 5 deletions(-) + +--- a/drivers/net/xen-netfront.c ++++ b/drivers/net/xen-netfront.c +@@ -125,10 +125,12 @@ struct netfront_queue { + struct sk_buff *tx_skbs[NET_TX_RING_SIZE]; + unsigned short tx_link[NET_TX_RING_SIZE]; + #define TX_LINK_NONE 0xffff ++#define TX_PENDING 0xfffe + grant_ref_t gref_tx_head; + grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; + struct page *grant_tx_page[NET_TX_RING_SIZE]; + unsigned tx_skb_freelist; ++ unsigned int tx_pend_queue; + + spinlock_t rx_lock ____cacheline_aligned_in_smp; + struct xen_netif_rx_front_ring rx; +@@ -154,6 +156,9 @@ struct netfront_info { + struct netfront_stats __percpu *rx_stats; + struct netfront_stats __percpu *tx_stats; + ++ /* Is device behaving sane? */ ++ bool broken; ++ + atomic_t rx_gso_checksum_fixup; + }; + +@@ -338,7 +343,7 @@ static int xennet_open(struct net_device + unsigned int i = 0; + struct netfront_queue *queue = NULL; + +- if (!np->queues) ++ if (!np->queues || np->broken) + return -ENODEV; + + for (i = 0; i < num_queues; ++i) { +@@ -365,11 +370,17 @@ static void xennet_tx_buf_gc(struct netf + RING_IDX cons, prod; + unsigned short id; + struct sk_buff *skb; ++ const struct device *dev = &queue->info->netdev->dev; + + BUG_ON(!netif_carrier_ok(queue->info->netdev)); + + do { + prod = queue->tx.sring->rsp_prod; ++ if (RING_RESPONSE_PROD_OVERFLOW(&queue->tx, prod)) { ++ dev_alert(dev, "Illegal number of responses %u\n", ++ prod - queue->tx.rsp_cons); ++ goto err; ++ } + rmb(); /* Ensure we see responses up to 'rp'. */ + + for (cons = queue->tx.rsp_cons; cons != prod; cons++) { +@@ -379,14 +390,27 @@ static void xennet_tx_buf_gc(struct netf + if (txrsp.status == XEN_NETIF_RSP_NULL) + continue; + +- id = txrsp.id; ++ id = txrsp.id; ++ if (id >= RING_SIZE(&queue->tx)) { ++ dev_alert(dev, ++ "Response has incorrect id (%u)\n", ++ id); ++ goto err; ++ } ++ if (queue->tx_link[id] != TX_PENDING) { ++ dev_alert(dev, ++ "Response for inactive request\n"); ++ goto err; ++ } ++ ++ queue->tx_link[id] = TX_LINK_NONE; + skb = queue->tx_skbs[id]; + queue->tx_skbs[id] = NULL; + if (unlikely(gnttab_query_foreign_access( + queue->grant_tx_ref[id]) != 0)) { +- pr_alert("%s: warning -- grant still in use by backend domain\n", +- __func__); +- BUG(); ++ dev_alert(dev, ++ "Grant still in use by backend domain\n"); ++ goto err; + } + gnttab_end_foreign_access_ref( + queue->grant_tx_ref[id], GNTMAP_readonly); +@@ -414,6 +438,12 @@ static void xennet_tx_buf_gc(struct netf + } while ((cons == prod) && (prod != queue->tx.sring->rsp_prod)); + + xennet_maybe_wake_tx(queue); ++ ++ return; ++ ++ err: ++ queue->info->broken = true; ++ dev_alert(dev, "Disabled for further use\n"); + } + + struct xennet_gnttab_make_txreq { +@@ -457,6 +487,12 @@ static void xennet_tx_setup_grant(unsign + + *tx = info->tx_local; + ++ /* ++ * Put the request in the pending queue, it will be set to be pending ++ * when the producer index is about to be raised. ++ */ ++ add_id_to_list(&queue->tx_pend_queue, queue->tx_link, id); ++ + info->tx = tx; + info->size += info->tx_local.size; + } +@@ -549,6 +585,15 @@ static u16 xennet_select_queue(struct ne + return queue_idx; + } + ++static void xennet_mark_tx_pending(struct netfront_queue *queue) ++{ ++ unsigned int i; ++ ++ while ((i = get_id_from_list(&queue->tx_pend_queue, queue->tx_link)) != ++ TX_LINK_NONE) ++ queue->tx_link[i] = TX_PENDING; ++} ++ + #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1) + + static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) +@@ -571,6 +616,8 @@ static int xennet_start_xmit(struct sk_b + /* Drop the packet if no queues are set up */ + if (num_queues < 1) + goto drop; ++ if (unlikely(np->broken)) ++ goto drop; + /* Determine which queue to transmit this SKB on */ + queue_index = skb_get_queue_mapping(skb); + queue = &np->queues[queue_index]; +@@ -660,6 +707,8 @@ static int xennet_start_xmit(struct sk_b + /* First request has the packet length. */ + first_tx->size = skb->len; + ++ xennet_mark_tx_pending(queue); ++ + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify); + if (notify) + notify_remote_via_irq(queue->tx_irq); +@@ -984,6 +1033,13 @@ static int xennet_poll(struct napi_struc + skb_queue_head_init(&tmpq); + + rp = queue->rx.sring->rsp_prod; ++ if (RING_RESPONSE_PROD_OVERFLOW(&queue->rx, rp)) { ++ dev_alert(&dev->dev, "Illegal number of responses %u\n", ++ rp - queue->rx.rsp_cons); ++ queue->info->broken = true; ++ spin_unlock(&queue->rx_lock); ++ return 0; ++ } + rmb(); /* Ensure we see queued responses up to 'rp'. */ + + i = queue->rx.rsp_cons; +@@ -1224,6 +1280,9 @@ static irqreturn_t xennet_tx_interrupt(i + struct netfront_queue *queue = dev_id; + unsigned long flags; + ++ if (queue->info->broken) ++ return IRQ_HANDLED; ++ + spin_lock_irqsave(&queue->tx_lock, flags); + xennet_tx_buf_gc(queue); + spin_unlock_irqrestore(&queue->tx_lock, flags); +@@ -1236,6 +1295,9 @@ static irqreturn_t xennet_rx_interrupt(i + struct netfront_queue *queue = dev_id; + struct net_device *dev = queue->info->netdev; + ++ if (queue->info->broken) ++ return IRQ_HANDLED; ++ + if (likely(netif_carrier_ok(dev) && + RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))) + napi_schedule(&queue->napi); +@@ -1257,6 +1319,10 @@ static void xennet_poll_controller(struc + struct netfront_info *info = netdev_priv(dev); + unsigned int num_queues = dev->real_num_tx_queues; + unsigned int i; ++ ++ if (info->broken) ++ return; ++ + for (i = 0; i < num_queues; ++i) + xennet_interrupt(0, &info->queues[i]); + } +@@ -1627,6 +1693,7 @@ static int xennet_init_queue(struct netf + + /* Initialise tx_skb_freelist as a free chain containing every entry. */ + queue->tx_skb_freelist = 0; ++ queue->tx_pend_queue = TX_LINK_NONE; + for (i = 0; i < NET_TX_RING_SIZE; i++) { + queue->tx_link[i] = i + 1; + queue->grant_tx_ref[i] = GRANT_INVALID_REF; +@@ -1842,6 +1909,9 @@ static int talk_to_netback(struct xenbus + if (info->queues) + xennet_destroy_queues(info); + ++ /* For the case of a reconnect reset the "broken" indicator. */ ++ info->broken = false; ++ + err = xennet_create_queues(info, &num_queues); + if (err < 0) { + xenbus_dev_fatal(dev, err, "creating queues"); diff --git a/queue-4.4/xen-netfront-read-response-from-backend-only-once.patch b/queue-4.4/xen-netfront-read-response-from-backend-only-once.patch new file mode 100644 index 00000000000..26f2135b8ae --- /dev/null +++ b/queue-4.4/xen-netfront-read-response-from-backend-only-once.patch @@ -0,0 +1,133 @@ +From foo@baz Mon Nov 29 07:12:46 PM CET 2021 +From: Juergen Gross +Date: Mon, 29 Nov 2021 15:57:03 +0100 +Subject: xen/netfront: read response from backend only once + +From: Juergen Gross + +commit 8446066bf8c1f9f7b7412c43fbea0fb87464d75b upstream. + +In order to avoid problems in case the backend is modifying a response +on the ring page while the frontend has already seen it, just read the +response into a local buffer in one go and then operate on that buffer +only. + +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netfront.c | 38 +++++++++++++++++++------------------- + 1 file changed, 19 insertions(+), 19 deletions(-) + +--- a/drivers/net/xen-netfront.c ++++ b/drivers/net/xen-netfront.c +@@ -387,13 +387,13 @@ static void xennet_tx_buf_gc(struct netf + rmb(); /* Ensure we see responses up to 'rp'. */ + + for (cons = queue->tx.rsp_cons; cons != prod; cons++) { +- struct xen_netif_tx_response *txrsp; ++ struct xen_netif_tx_response txrsp; + +- txrsp = RING_GET_RESPONSE(&queue->tx, cons); +- if (txrsp->status == XEN_NETIF_RSP_NULL) ++ RING_COPY_RESPONSE(&queue->tx, cons, &txrsp); ++ if (txrsp.status == XEN_NETIF_RSP_NULL) + continue; + +- id = txrsp->id; ++ id = txrsp.id; + skb = queue->tx_skbs[id].skb; + if (unlikely(gnttab_query_foreign_access( + queue->grant_tx_ref[id]) != 0)) { +@@ -736,7 +736,7 @@ static int xennet_get_extras(struct netf + RING_IDX rp) + + { +- struct xen_netif_extra_info *extra; ++ struct xen_netif_extra_info extra; + struct device *dev = &queue->info->netdev->dev; + RING_IDX cons = queue->rx.rsp_cons; + int err = 0; +@@ -752,24 +752,22 @@ static int xennet_get_extras(struct netf + break; + } + +- extra = (struct xen_netif_extra_info *) +- RING_GET_RESPONSE(&queue->rx, ++cons); ++ RING_COPY_RESPONSE(&queue->rx, ++cons, &extra); + +- if (unlikely(!extra->type || +- extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { ++ if (unlikely(!extra.type || ++ extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { + if (net_ratelimit()) + dev_warn(dev, "Invalid extra type: %d\n", +- extra->type); ++ extra.type); + err = -EINVAL; + } else { +- memcpy(&extras[extra->type - 1], extra, +- sizeof(*extra)); ++ extras[extra.type - 1] = extra; + } + + skb = xennet_get_rx_skb(queue, cons); + ref = xennet_get_rx_ref(queue, cons); + xennet_move_rx_slot(queue, skb, ref); +- } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); ++ } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); + + queue->rx.rsp_cons = cons; + return err; +@@ -779,7 +777,7 @@ static int xennet_get_responses(struct n + struct netfront_rx_info *rinfo, RING_IDX rp, + struct sk_buff_head *list) + { +- struct xen_netif_rx_response *rx = &rinfo->rx; ++ struct xen_netif_rx_response *rx = &rinfo->rx, rx_local; + struct xen_netif_extra_info *extras = rinfo->extras; + struct device *dev = &queue->info->netdev->dev; + RING_IDX cons = queue->rx.rsp_cons; +@@ -837,7 +835,8 @@ next: + break; + } + +- rx = RING_GET_RESPONSE(&queue->rx, cons + slots); ++ RING_COPY_RESPONSE(&queue->rx, cons + slots, &rx_local); ++ rx = &rx_local; + skb = xennet_get_rx_skb(queue, cons + slots); + ref = xennet_get_rx_ref(queue, cons + slots); + slots++; +@@ -892,10 +891,11 @@ static int xennet_fill_frags(struct netf + struct sk_buff *nskb; + + while ((nskb = __skb_dequeue(list))) { +- struct xen_netif_rx_response *rx = +- RING_GET_RESPONSE(&queue->rx, ++cons); ++ struct xen_netif_rx_response rx; + skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0]; + ++ RING_COPY_RESPONSE(&queue->rx, ++cons, &rx); ++ + if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) { + unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to; + +@@ -910,7 +910,7 @@ static int xennet_fill_frags(struct netf + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + skb_frag_page(nfrag), +- rx->offset, rx->status, PAGE_SIZE); ++ rx.offset, rx.status, PAGE_SIZE); + + skb_shinfo(nskb)->nr_frags = 0; + kfree_skb(nskb); +@@ -1008,7 +1008,7 @@ static int xennet_poll(struct napi_struc + i = queue->rx.rsp_cons; + work_done = 0; + while ((i != rp) && (work_done < budget)) { +- memcpy(rx, RING_GET_RESPONSE(&queue->rx, i), sizeof(*rx)); ++ RING_COPY_RESPONSE(&queue->rx, i, rx); + memset(extras, 0, sizeof(rinfo.extras)); + + err = xennet_get_responses(queue, &rinfo, rp, &tmpq); diff --git a/queue-4.4/xen-sync-include-xen-interface-io-ring.h-with-xen-s-newest-version.patch b/queue-4.4/xen-sync-include-xen-interface-io-ring.h-with-xen-s-newest-version.patch new file mode 100644 index 00000000000..75cf517a41a --- /dev/null +++ b/queue-4.4/xen-sync-include-xen-interface-io-ring.h-with-xen-s-newest-version.patch @@ -0,0 +1,382 @@ +From foo@baz Mon Nov 29 07:12:46 PM CET 2021 +From: Juergen Gross +Date: Mon, 29 Nov 2021 13:37:27 +0100 +Subject: xen: sync include/xen/interface/io/ring.h with Xen's newest version + +From: Juergen Gross + +commit 629a5d87e26fe96bcaab44cbb81f5866af6f7008 upstream. + +Sync include/xen/interface/io/ring.h with Xen's newest version in +order to get the RING_COPY_RESPONSE() and RING_RESPONSE_PROD_OVERFLOW() +macros. + +Signed-off-by: Juergen Gross +Signed-off-by: Greg Kroah-Hartman +--- + include/xen/interface/io/ring.h | 271 +++++++++++++++++++--------------------- + 1 file changed, 131 insertions(+), 140 deletions(-) + +--- a/include/xen/interface/io/ring.h ++++ b/include/xen/interface/io/ring.h +@@ -24,82 +24,79 @@ typedef unsigned int RING_IDX; + * A ring contains as many entries as will fit, rounded down to the nearest + * power of two (so we can mask with (size-1) to loop around). + */ +-#define __CONST_RING_SIZE(_s, _sz) \ +- (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ +- sizeof(((struct _s##_sring *)0)->ring[0]))) +- ++#define __CONST_RING_SIZE(_s, _sz) \ ++ (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ ++ sizeof(((struct _s##_sring *)0)->ring[0]))) + /* + * The same for passing in an actual pointer instead of a name tag. + */ +-#define __RING_SIZE(_s, _sz) \ +- (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) ++#define __RING_SIZE(_s, _sz) \ ++ (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) + + /* + * Macros to make the correct C datatypes for a new kind of ring. + * + * To make a new ring datatype, you need to have two message structures, +- * let's say struct request, and struct response already defined. ++ * let's say request_t, and response_t already defined. + * + * In a header where you want the ring datatype declared, you then do: + * +- * DEFINE_RING_TYPES(mytag, struct request, struct response); ++ * DEFINE_RING_TYPES(mytag, request_t, response_t); + * + * These expand out to give you a set of types, as you can see below. + * The most important of these are: + * +- * struct mytag_sring - The shared ring. +- * struct mytag_front_ring - The 'front' half of the ring. +- * struct mytag_back_ring - The 'back' half of the ring. ++ * mytag_sring_t - The shared ring. ++ * mytag_front_ring_t - The 'front' half of the ring. ++ * mytag_back_ring_t - The 'back' half of the ring. + * + * To initialize a ring in your code you need to know the location and size + * of the shared memory area (PAGE_SIZE, for instance). To initialise + * the front half: + * +- * struct mytag_front_ring front_ring; +- * SHARED_RING_INIT((struct mytag_sring *)shared_page); +- * FRONT_RING_INIT(&front_ring, (struct mytag_sring *)shared_page, +- * PAGE_SIZE); ++ * mytag_front_ring_t front_ring; ++ * SHARED_RING_INIT((mytag_sring_t *)shared_page); ++ * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + * + * Initializing the back follows similarly (note that only the front + * initializes the shared ring): + * +- * struct mytag_back_ring back_ring; +- * BACK_RING_INIT(&back_ring, (struct mytag_sring *)shared_page, +- * PAGE_SIZE); ++ * mytag_back_ring_t back_ring; ++ * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + */ + +-#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ +- \ +-/* Shared ring entry */ \ +-union __name##_sring_entry { \ +- __req_t req; \ +- __rsp_t rsp; \ +-}; \ +- \ +-/* Shared ring page */ \ +-struct __name##_sring { \ +- RING_IDX req_prod, req_event; \ +- RING_IDX rsp_prod, rsp_event; \ +- uint8_t pad[48]; \ +- union __name##_sring_entry ring[1]; /* variable-length */ \ +-}; \ +- \ +-/* "Front" end's private variables */ \ +-struct __name##_front_ring { \ +- RING_IDX req_prod_pvt; \ +- RING_IDX rsp_cons; \ +- unsigned int nr_ents; \ +- struct __name##_sring *sring; \ +-}; \ +- \ +-/* "Back" end's private variables */ \ +-struct __name##_back_ring { \ +- RING_IDX rsp_prod_pvt; \ +- RING_IDX req_cons; \ +- unsigned int nr_ents; \ +- struct __name##_sring *sring; \ +-}; +- ++#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ ++ \ ++/* Shared ring entry */ \ ++union __name##_sring_entry { \ ++ __req_t req; \ ++ __rsp_t rsp; \ ++}; \ ++ \ ++/* Shared ring page */ \ ++struct __name##_sring { \ ++ RING_IDX req_prod, req_event; \ ++ RING_IDX rsp_prod, rsp_event; \ ++ uint8_t __pad[48]; \ ++ union __name##_sring_entry ring[1]; /* variable-length */ \ ++}; \ ++ \ ++/* "Front" end's private variables */ \ ++struct __name##_front_ring { \ ++ RING_IDX req_prod_pvt; \ ++ RING_IDX rsp_cons; \ ++ unsigned int nr_ents; \ ++ struct __name##_sring *sring; \ ++}; \ ++ \ ++/* "Back" end's private variables */ \ ++struct __name##_back_ring { \ ++ RING_IDX rsp_prod_pvt; \ ++ RING_IDX req_cons; \ ++ unsigned int nr_ents; \ ++ struct __name##_sring *sring; \ ++}; \ ++ \ + /* + * Macros for manipulating rings. + * +@@ -116,105 +113,99 @@ struct __name##_back_ring { \ + */ + + /* Initialising empty rings */ +-#define SHARED_RING_INIT(_s) do { \ +- (_s)->req_prod = (_s)->rsp_prod = 0; \ +- (_s)->req_event = (_s)->rsp_event = 1; \ +- memset((_s)->pad, 0, sizeof((_s)->pad)); \ ++#define SHARED_RING_INIT(_s) do { \ ++ (_s)->req_prod = (_s)->rsp_prod = 0; \ ++ (_s)->req_event = (_s)->rsp_event = 1; \ ++ (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ + } while(0) + +-#define FRONT_RING_INIT(_r, _s, __size) do { \ +- (_r)->req_prod_pvt = 0; \ +- (_r)->rsp_cons = 0; \ +- (_r)->nr_ents = __RING_SIZE(_s, __size); \ +- (_r)->sring = (_s); \ ++#define FRONT_RING_ATTACH(_r, _s, _i, __size) do { \ ++ (_r)->req_prod_pvt = (_i); \ ++ (_r)->rsp_cons = (_i); \ ++ (_r)->nr_ents = __RING_SIZE(_s, __size); \ ++ (_r)->sring = (_s); \ + } while (0) + +-#define BACK_RING_INIT(_r, _s, __size) do { \ +- (_r)->rsp_prod_pvt = 0; \ +- (_r)->req_cons = 0; \ +- (_r)->nr_ents = __RING_SIZE(_s, __size); \ +- (_r)->sring = (_s); \ +-} while (0) ++#define FRONT_RING_INIT(_r, _s, __size) FRONT_RING_ATTACH(_r, _s, 0, __size) + +-/* Initialize to existing shared indexes -- for recovery */ +-#define FRONT_RING_ATTACH(_r, _s, __size) do { \ +- (_r)->sring = (_s); \ +- (_r)->req_prod_pvt = (_s)->req_prod; \ +- (_r)->rsp_cons = (_s)->rsp_prod; \ +- (_r)->nr_ents = __RING_SIZE(_s, __size); \ ++#define BACK_RING_ATTACH(_r, _s, _i, __size) do { \ ++ (_r)->rsp_prod_pvt = (_i); \ ++ (_r)->req_cons = (_i); \ ++ (_r)->nr_ents = __RING_SIZE(_s, __size); \ ++ (_r)->sring = (_s); \ + } while (0) + +-#define BACK_RING_ATTACH(_r, _s, __size) do { \ +- (_r)->sring = (_s); \ +- (_r)->rsp_prod_pvt = (_s)->rsp_prod; \ +- (_r)->req_cons = (_s)->req_prod; \ +- (_r)->nr_ents = __RING_SIZE(_s, __size); \ +-} while (0) ++#define BACK_RING_INIT(_r, _s, __size) BACK_RING_ATTACH(_r, _s, 0, __size) + + /* How big is this ring? */ +-#define RING_SIZE(_r) \ ++#define RING_SIZE(_r) \ + ((_r)->nr_ents) + + /* Number of free requests (for use on front side only). */ +-#define RING_FREE_REQUESTS(_r) \ ++#define RING_FREE_REQUESTS(_r) \ + (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) + + /* Test if there is an empty slot available on the front ring. + * (This is only meaningful from the front. ) + */ +-#define RING_FULL(_r) \ ++#define RING_FULL(_r) \ + (RING_FREE_REQUESTS(_r) == 0) + + /* Test if there are outstanding messages to be processed on a ring. */ +-#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ ++#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ + ((_r)->sring->rsp_prod - (_r)->rsp_cons) + +-#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ +- ({ \ +- unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ +- unsigned int rsp = RING_SIZE(_r) - \ +- ((_r)->req_cons - (_r)->rsp_prod_pvt); \ +- req < rsp ? req : rsp; \ +- }) ++#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ ++ unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ ++ unsigned int rsp = RING_SIZE(_r) - \ ++ ((_r)->req_cons - (_r)->rsp_prod_pvt); \ ++ req < rsp ? req : rsp; \ ++}) + + /* Direct access to individual ring elements, by index. */ +-#define RING_GET_REQUEST(_r, _idx) \ ++#define RING_GET_REQUEST(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) + ++#define RING_GET_RESPONSE(_r, _idx) \ ++ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) ++ + /* +- * Get a local copy of a request. ++ * Get a local copy of a request/response. + * +- * Use this in preference to RING_GET_REQUEST() so all processing is ++ * Use this in preference to RING_GET_{REQUEST,RESPONSE}() so all processing is + * done on a local copy that cannot be modified by the other end. + * + * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this +- * to be ineffective where _req is a struct which consists of only bitfields. ++ * to be ineffective where dest is a struct which consists of only bitfields. + */ +-#define RING_COPY_REQUEST(_r, _idx, _req) do { \ +- /* Use volatile to force the copy into _req. */ \ +- *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx); \ ++#define RING_COPY_(type, r, idx, dest) do { \ ++ /* Use volatile to force the copy into dest. */ \ ++ *(dest) = *(volatile typeof(dest))RING_GET_##type(r, idx); \ + } while (0) + +-#define RING_GET_RESPONSE(_r, _idx) \ +- (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) ++#define RING_COPY_REQUEST(r, idx, req) RING_COPY_(REQUEST, r, idx, req) ++#define RING_COPY_RESPONSE(r, idx, rsp) RING_COPY_(RESPONSE, r, idx, rsp) + + /* Loop termination condition: Would the specified index overflow the ring? */ +-#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ ++#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ + (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) + + /* Ill-behaved frontend determination: Can there be this many requests? */ +-#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ ++#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ + (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) + +- +-#define RING_PUSH_REQUESTS(_r) do { \ +- wmb(); /* back sees requests /before/ updated producer index */ \ +- (_r)->sring->req_prod = (_r)->req_prod_pvt; \ ++/* Ill-behaved backend determination: Can there be this many responses? */ ++#define RING_RESPONSE_PROD_OVERFLOW(_r, _prod) \ ++ (((_prod) - (_r)->rsp_cons) > RING_SIZE(_r)) ++ ++#define RING_PUSH_REQUESTS(_r) do { \ ++ wmb(); /* back sees requests /before/ updated producer index */ \ ++ (_r)->sring->req_prod = (_r)->req_prod_pvt; \ + } while (0) + +-#define RING_PUSH_RESPONSES(_r) do { \ +- wmb(); /* front sees responses /before/ updated producer index */ \ +- (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ ++#define RING_PUSH_RESPONSES(_r) do { \ ++ wmb(); /* front sees resps /before/ updated producer index */ \ ++ (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ + } while (0) + + /* +@@ -247,40 +238,40 @@ struct __name##_back_ring { \ + * field appropriately. + */ + +-#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ +- RING_IDX __old = (_r)->sring->req_prod; \ +- RING_IDX __new = (_r)->req_prod_pvt; \ +- wmb(); /* back sees requests /before/ updated producer index */ \ +- (_r)->sring->req_prod = __new; \ +- mb(); /* back sees new requests /before/ we check req_event */ \ +- (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ +- (RING_IDX)(__new - __old)); \ +-} while (0) +- +-#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ +- RING_IDX __old = (_r)->sring->rsp_prod; \ +- RING_IDX __new = (_r)->rsp_prod_pvt; \ +- wmb(); /* front sees responses /before/ updated producer index */ \ +- (_r)->sring->rsp_prod = __new; \ +- mb(); /* front sees new responses /before/ we check rsp_event */ \ +- (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ +- (RING_IDX)(__new - __old)); \ +-} while (0) +- +-#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ +- (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +- if (_work_to_do) break; \ +- (_r)->sring->req_event = (_r)->req_cons + 1; \ +- mb(); \ +- (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +-} while (0) +- +-#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ +- (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ +- if (_work_to_do) break; \ +- (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ +- mb(); \ +- (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ ++#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ ++ RING_IDX __old = (_r)->sring->req_prod; \ ++ RING_IDX __new = (_r)->req_prod_pvt; \ ++ wmb(); /* back sees requests /before/ updated producer index */ \ ++ (_r)->sring->req_prod = __new; \ ++ mb(); /* back sees new requests /before/ we check req_event */ \ ++ (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ ++ (RING_IDX)(__new - __old)); \ ++} while (0) ++ ++#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ ++ RING_IDX __old = (_r)->sring->rsp_prod; \ ++ RING_IDX __new = (_r)->rsp_prod_pvt; \ ++ wmb(); /* front sees resps /before/ updated producer index */ \ ++ (_r)->sring->rsp_prod = __new; \ ++ mb(); /* front sees new resps /before/ we check rsp_event */ \ ++ (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ ++ (RING_IDX)(__new - __old)); \ ++} while (0) ++ ++#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ ++ (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ ++ if (_work_to_do) break; \ ++ (_r)->sring->req_event = (_r)->req_cons + 1; \ ++ mb(); \ ++ (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ ++} while (0) ++ ++#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ ++ (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ ++ if (_work_to_do) break; \ ++ (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ ++ mb(); \ ++ (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ + } while (0) + + #endif /* __XEN_PUBLIC_IO_RING_H__ */