From 6aacd1484468361d1d04badfe75f264fa5314864 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Tue, 29 Oct 2024 16:46:12 +0800 Subject: [PATCH 1/4] virtio-net: fix overflow inside virtnet_rq_alloc When the frag just got a page, then may lead to regression on VM. Specially if the sysctl net.core.high_order_alloc_disable value is 1, then the frag always get a page when do refill. Which could see reliable crashes or scp failure (scp a file 100M in size to VM). The issue is that the virtnet_rq_dma takes up 16 bytes at the beginning of a new frag. When the frag size is larger than PAGE_SIZE, everything is fine. However, if the frag is only one page and the total size of the buffer and virtnet_rq_dma is larger than one page, an overflow may occur. The commit f9dac92ba908 ("virtio_ring: enable premapped mode whatever use_dma_api") introduced this problem. And we reverted some commits to fix this in last linux version. Now we try to enable it and fix this bug directly. Here, when the frag size is not enough, we reduce the buffer len to fix this problem. Reported-by: "Si-Wei Liu" Tested-by: Darren Kenny Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Signed-off-by: Paolo Abeni --- drivers/net/virtio_net.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 869586c17ffd..3ff063fff443 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -926,9 +926,6 @@ static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) void *buf, *head; dma_addr_t addr; - if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp))) - return NULL; - head = page_address(alloc_frag->page); if (rq->do_dma) { @@ -2423,6 +2420,9 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, len = SKB_DATA_ALIGN(len) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + if (unlikely(!skb_page_frag_refill(len, &rq->alloc_frag, gfp))) + return -ENOMEM; + buf = virtnet_rq_alloc(rq, len, gfp); if (unlikely(!buf)) return -ENOMEM; @@ -2525,6 +2525,12 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, */ len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); + if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) + return -ENOMEM; + + if (!alloc_frag->offset && len + room + sizeof(struct virtnet_rq_dma) > alloc_frag->size) + len -= sizeof(struct virtnet_rq_dma); + buf = virtnet_rq_alloc(rq, len + room, gfp); if (unlikely(!buf)) return -ENOMEM; From a33f3df850750216f432b637a5020ad6a740cac1 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Tue, 29 Oct 2024 16:46:13 +0800 Subject: [PATCH 2/4] virtio_net: big mode skip the unmap check The virtio-net big mode did not enable premapped mode, so we did not need to check the unmap. And the subsequent commit will remove the failover code for failing enable premapped for merge and small mode. So we need to remove the checking do_dma code in the big mode path. Tested-by: Darren Kenny Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Signed-off-by: Paolo Abeni --- drivers/net/virtio_net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 3ff063fff443..b82f6b8a8231 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -987,7 +987,7 @@ static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) return; } - if (rq->do_dma) + if (!vi->big_packets || vi->mergeable_rx_bufs) virtnet_rq_unmap(rq, buf, 0); virtnet_rq_free_buf(vi, rq, buf); @@ -2712,7 +2712,7 @@ static int virtnet_receive_packets(struct virtnet_info *vi, } } else { while (packets < budget && - (buf = virtnet_rq_get_buf(rq, &len, NULL)) != NULL) { + (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats); packets++; } From 47008bb51c3e11c187dacc17af334bdca97c2dca Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Tue, 29 Oct 2024 16:46:14 +0800 Subject: [PATCH 3/4] virtio_net: enable premapped mode for merge and small by default Currently, the virtio core will perform a dma operation for each buffer. Although, the same page may be operated multiple times. In premapped mod, we can perform only one dma operation for the pages of the alloc frag. This is beneficial for the iommu device. kernel command line: intel_iommu=on iommu.passthrough=0 | strict=0 | strict=1 Before | 775496pps | 428614pps After | 1109316pps | 742853pps In the 6.11, we disabled this feature because a regress [1]. Now, we fix the problem and re-enable it. [1]: http://lore.kernel.org/all/8b20cc28-45a9-4643-8e87-ba164a540c0a@oracle.com Tested-by: Darren Kenny Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Signed-off-by: Paolo Abeni --- drivers/net/virtio_net.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b82f6b8a8231..befc30b3abb1 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -6107,6 +6107,17 @@ err_ctrl: return -ENOMEM; } +static void virtnet_rq_set_premapped(struct virtnet_info *vi) +{ + int i; + + for (i = 0; i < vi->max_queue_pairs; i++) { + /* error should never happen */ + BUG_ON(virtqueue_set_dma_premapped(vi->rq[i].vq)); + vi->rq[i].do_dma = true; + } +} + static int init_vqs(struct virtnet_info *vi) { int ret; @@ -6120,6 +6131,10 @@ static int init_vqs(struct virtnet_info *vi) if (ret) goto err_free; + /* disable for big mode */ + if (!vi->big_packets || vi->mergeable_rx_bufs) + virtnet_rq_set_premapped(vi); + cpus_read_lock(); virtnet_set_affinity(vi); cpus_read_unlock(); From fb22437c1ba37fc54eff949023923cc7887aaaa1 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Tue, 29 Oct 2024 16:46:15 +0800 Subject: [PATCH 4/4] virtio_net: rx remove premapped failover code Now, the premapped mode can be enabled unconditionally. So we can remove the failover code for merge and small mode. The virtnet_rq_xxx() helper would be only used if the mode is using pre mapping. A check is added to prevent misusing of these API. Tested-by: Darren Kenny Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Signed-off-by: Paolo Abeni --- drivers/net/virtio_net.c | 94 ++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 48 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index befc30b3abb1..4b27ded8fc16 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -356,9 +356,6 @@ struct receive_queue { struct xdp_rxq_info xsk_rxq_info; struct xdp_buff **xsk_buffs; - - /* Do dma by self */ - bool do_dma; }; /* This structure can contain rss message with maximum settings for indirection table and keysize @@ -856,11 +853,14 @@ ok: static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) { + struct virtnet_info *vi = rq->vq->vdev->priv; struct page *page = virt_to_head_page(buf); struct virtnet_rq_dma *dma; void *head; int offset; + BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); + head = page_address(page); dma = head; @@ -885,10 +885,13 @@ static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) { + struct virtnet_info *vi = rq->vq->vdev->priv; void *buf; + BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); + buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); - if (buf && rq->do_dma) + if (buf) virtnet_rq_unmap(rq, buf, *len); return buf; @@ -896,15 +899,13 @@ static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) { + struct virtnet_info *vi = rq->vq->vdev->priv; struct virtnet_rq_dma *dma; dma_addr_t addr; u32 offset; void *head; - if (!rq->do_dma) { - sg_init_one(rq->sg, buf, len); - return; - } + BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); head = page_address(rq->alloc_frag.page); @@ -922,50 +923,51 @@ static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) { struct page_frag *alloc_frag = &rq->alloc_frag; + struct virtnet_info *vi = rq->vq->vdev->priv; struct virtnet_rq_dma *dma; void *buf, *head; dma_addr_t addr; + BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); + head = page_address(alloc_frag->page); - if (rq->do_dma) { - dma = head; + dma = head; - /* new pages */ - if (!alloc_frag->offset) { - if (rq->last_dma) { - /* Now, the new page is allocated, the last dma - * will not be used. So the dma can be unmapped - * if the ref is 0. - */ - virtnet_rq_unmap(rq, rq->last_dma, 0); - rq->last_dma = NULL; - } - - dma->len = alloc_frag->size - sizeof(*dma); - - addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, - dma->len, DMA_FROM_DEVICE, 0); - if (virtqueue_dma_mapping_error(rq->vq, addr)) - return NULL; - - dma->addr = addr; - dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); - - /* Add a reference to dma to prevent the entire dma from - * being released during error handling. This reference - * will be freed after the pages are no longer used. + /* new pages */ + if (!alloc_frag->offset) { + if (rq->last_dma) { + /* Now, the new page is allocated, the last dma + * will not be used. So the dma can be unmapped + * if the ref is 0. */ - get_page(alloc_frag->page); - dma->ref = 1; - alloc_frag->offset = sizeof(*dma); - - rq->last_dma = dma; + virtnet_rq_unmap(rq, rq->last_dma, 0); + rq->last_dma = NULL; } - ++dma->ref; + dma->len = alloc_frag->size - sizeof(*dma); + + addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, + dma->len, DMA_FROM_DEVICE, 0); + if (virtqueue_dma_mapping_error(rq->vq, addr)) + return NULL; + + dma->addr = addr; + dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); + + /* Add a reference to dma to prevent the entire dma from + * being released during error handling. This reference + * will be freed after the pages are no longer used. + */ + get_page(alloc_frag->page); + dma->ref = 1; + alloc_frag->offset = sizeof(*dma); + + rq->last_dma = dma; } + ++dma->ref; + buf = head + alloc_frag->offset; get_page(alloc_frag->page); @@ -2433,8 +2435,7 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); if (err < 0) { - if (rq->do_dma) - virtnet_rq_unmap(rq, buf, 0); + virtnet_rq_unmap(rq, buf, 0); put_page(virt_to_head_page(buf)); } @@ -2554,8 +2555,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, ctx = mergeable_len_to_ctx(len + room, headroom); err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); if (err < 0) { - if (rq->do_dma) - virtnet_rq_unmap(rq, buf, 0); + virtnet_rq_unmap(rq, buf, 0); put_page(virt_to_head_page(buf)); } @@ -5922,7 +5922,7 @@ static void free_receive_page_frags(struct virtnet_info *vi) int i; for (i = 0; i < vi->max_queue_pairs; i++) if (vi->rq[i].alloc_frag.page) { - if (vi->rq[i].do_dma && vi->rq[i].last_dma) + if (vi->rq[i].last_dma) virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); put_page(vi->rq[i].alloc_frag.page); } @@ -6111,11 +6111,9 @@ static void virtnet_rq_set_premapped(struct virtnet_info *vi) { int i; - for (i = 0; i < vi->max_queue_pairs; i++) { + for (i = 0; i < vi->max_queue_pairs; i++) /* error should never happen */ BUG_ON(virtqueue_set_dma_premapped(vi->rq[i].vq)); - vi->rq[i].do_dma = true; - } } static int init_vqs(struct virtnet_info *vi)