From cac9e4418f4cbd548ccb065b3adcafe073f7f7d2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 12 Jun 2023 13:51:36 -0600 Subject: [PATCH 1/2] io_uring/net: save msghdr->msg_control for retries If the application sets ->msg_control and we have to later retry this command, or if it got queued with IOSQE_ASYNC to begin with, then we need to retain the original msg_control value. This is due to the net stack overwriting this field with an in-kernel pointer, to copy it in. Hitting that path for the second time will now fail the copy from user, as it's attempting to copy from a non-user address. Cc: stable@vger.kernel.org # 5.10+ Link: https://github.com/axboe/liburing/issues/880 Reported-and-tested-by: Marek Majkowski Signed-off-by: Jens Axboe --- io_uring/net.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/io_uring/net.c b/io_uring/net.c index 89e839013837..51b0f7fbb4f5 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -65,6 +65,7 @@ struct io_sr_msg { u16 addr_len; u16 buf_group; void __user *addr; + void __user *msg_control; /* used only for send zerocopy */ struct io_kiocb *notif; }; @@ -195,11 +196,15 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + int ret; iomsg->msg.msg_name = &iomsg->addr; iomsg->free_iov = iomsg->fast_iov; - return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, + ret = sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, &iomsg->free_iov); + /* save msg_control as sys_sendmsg() overwrites it */ + sr->msg_control = iomsg->msg.msg_control; + return ret; } int io_send_prep_async(struct io_kiocb *req) @@ -297,6 +302,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) if (req_has_async_data(req)) { kmsg = req->async_data; + kmsg->msg.msg_control = sr->msg_control; } else { ret = io_sendmsg_copy_hdr(req, &iomsg); if (ret) From adeaa3f290ecf7f6a6a5c53219a4686cbdff5fbd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 13 Jun 2023 19:26:55 -0600 Subject: [PATCH 2/2] io_uring/io-wq: clear current->worker_private on exit A recent fix stopped clearing PF_IO_WORKER from current->flags on exit, which meant that we can now call inc/dec running on the worker after it has been removed if it ends up scheduling in/out as part of exit. If this happens after an RCU grace period has passed, then the struct pointed to by current->worker_private may have been freed, and we can now be accessing memory that is freed. Ensure this doesn't happen by clearing the task worker_private field. Both io_wq_worker_running() and io_wq_worker_sleeping() check this field before going any further, and we don't need any accounting etc done after this worker has exited. Fixes: fd37b884003c ("io_uring/io-wq: don't clear PF_IO_WORKER on exit") Reported-by: Zorro Lang Signed-off-by: Jens Axboe --- io_uring/io-wq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index fe38eb0cbc82..399e9a15c38d 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -220,7 +220,12 @@ static void io_worker_exit(struct io_worker *worker) list_del_rcu(&worker->all_list); raw_spin_unlock(&wq->lock); io_wq_dec_running(worker); - worker->flags = 0; + /* + * this worker is a goner, clear ->worker_private to avoid any + * inc/dec running calls that could happen as part of exit from + * touching 'worker'. + */ + current->worker_private = NULL; kfree_rcu(worker, rcu); io_worker_ref_put(wq);