From 3280d66a6363af0df0441709bc0bc302bd9a2510 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 14 Aug 2017 16:40:11 -0400
Subject: [PATCH 1/9] blk-mq: Fix queue usage on failed request allocation

blk_mq_get_request() does not release the callers queue usage counter
when allocation fails. The caller still needs to account for its own
queue usage when it is unable to allocate a request.

Fixes: 1ad43c0078b7 ("blk-mq: don't leak preempt counter/q_usage_counter when allocating rq failed")

Reported-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Tested-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 535cbdf32aab..4603b115e234 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -360,12 +360,12 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
 		return ERR_PTR(ret);
 
 	rq = blk_mq_get_request(q, NULL, op, &alloc_data);
+	blk_queue_exit(q);
 
 	if (!rq)
 		return ERR_PTR(-EWOULDBLOCK);
 
 	blk_mq_put_ctx(alloc_data.ctx);
-	blk_queue_exit(q);
 
 	rq->__data_len = 0;
 	rq->__sector = (sector_t) -1;
@@ -411,12 +411,11 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 	alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
 
 	rq = blk_mq_get_request(q, NULL, op, &alloc_data);
+	blk_queue_exit(q);
 
 	if (!rq)
 		return ERR_PTR(-EWOULDBLOCK);
 
-	blk_queue_exit(q);
-
 	return rq;
 }
 EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);

From 462cdace790ac2ed6aad1b19c9c0af0143b6aab0 Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Tue, 18 Jul 2017 15:01:00 +0100
Subject: [PATCH 2/9] xen: fix bio vec merging
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current test for bio vec merging is not fully accurate and can be
tricked into merging bios when certain grant combinations are used.
The result of these malicious bio merges is a bio that extends past
the memory page used by any of the originating bios.

Take into account the following scenario, where a guest creates two
grant references that point to the same mfn, ie: grant 1 -> mfn A,
grant 2 -> mfn A.

These references are then used in a PV block request, and mapped by
the backend domain, thus obtaining two different pfns that point to
the same mfn, pfn B -> mfn A, pfn C -> mfn A.

If those grants happen to be used in two consecutive sectors of a disk
IO operation becoming two different bios in the backend domain, the
checks in xen_biovec_phys_mergeable will succeed, because bfn1 == bfn2
(they both point to the same mfn). However due to the bio merging,
the backend domain will end up with a bio that expands past mfn A into
mfn A + 1.

Fix this by making sure the check in xen_biovec_phys_mergeable takes
into account the offset and the length of the bio, this basically
replicates whats done in __BIOVEC_PHYS_MERGEABLE using mfns (bus
addresses). While there also remove the usage of
__BIOVEC_PHYS_MERGEABLE, since that's already checked by the callers
of xen_biovec_phys_mergeable.

CC: stable@vger.kernel.org
Reported-by: "Jan H. Schönherr" <jschoenh@amazon.de>
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/biomerge.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c
index 4da69dbf7dca..1bdd02a6d6ac 100644
--- a/drivers/xen/biomerge.c
+++ b/drivers/xen/biomerge.c
@@ -10,8 +10,7 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
 	unsigned long bfn1 = pfn_to_bfn(page_to_pfn(vec1->bv_page));
 	unsigned long bfn2 = pfn_to_bfn(page_to_pfn(vec2->bv_page));
 
-	return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) &&
-		((bfn1 == bfn2) || ((bfn1+1) == bfn2));
+	return bfn1 + PFN_DOWN(vec1->bv_offset + vec1->bv_len) == bfn2;
 #else
 	/*
 	 * XXX: Add support for merging bio_vec when using different page

From b15bd8cb37598afb2963f7eb9e2de468d2d60a2f Mon Sep 17 00:00:00 2001
From: Munehisa Kamata <kamatam@amazon.com>
Date: Wed, 9 Aug 2017 15:31:40 -0700
Subject: [PATCH 3/9] xen-blkfront: use a right index when checking requests

Since commit d05d7f40791c ("Merge branch 'for-4.8/core' of
git://git.kernel.dk/linux-block") and 3fc9d690936f ("Merge branch
'for-4.8/drivers' of git://git.kernel.dk/linux-block"), blkfront_resume()
has been using an index for iterating ring_info to check request when
iterating blk_shadow in an inner loop. This seems to have been
accidentally introduced during the massive rewrite of the block layer
macros in the commits.

This may cause crash like this:

[11798.057074] BUG: unable to handle kernel NULL pointer dereference at 0000000000000048
[11798.058832] IP: [<ffffffff814411fa>] blkfront_resume+0x10a/0x610
....
[11798.061063] Call Trace:
[11798.061063]  [<ffffffff8139ce93>] xenbus_dev_resume+0x53/0x140
[11798.061063]  [<ffffffff8139ce40>] ? xenbus_dev_probe+0x150/0x150
[11798.061063]  [<ffffffff813f359e>] dpm_run_callback+0x3e/0x110
[11798.061063]  [<ffffffff813f3a08>] device_resume+0x88/0x190
[11798.061063]  [<ffffffff813f4cc0>] dpm_resume+0x100/0x2d0
[11798.061063]  [<ffffffff813f5221>] dpm_resume_end+0x11/0x20
[11798.061063]  [<ffffffff813950a8>] do_suspend+0xe8/0x1a0
[11798.061063]  [<ffffffff813954bd>] shutdown_handler+0xfd/0x130
[11798.061063]  [<ffffffff8139aba0>] ? split+0x110/0x110
[11798.061063]  [<ffffffff8139ac26>] xenwatch_thread+0x86/0x120
[11798.061063]  [<ffffffff810b4570>] ? prepare_to_wait_event+0x110/0x110
[11798.061063]  [<ffffffff8108fe57>] kthread+0xd7/0xf0
[11798.061063]  [<ffffffff811da811>] ? kfree+0x121/0x170
[11798.061063]  [<ffffffff8108fd80>] ? kthread_park+0x60/0x60
[11798.061063]  [<ffffffff810863b0>] ?  call_usermodehelper_exec_work+0xb0/0xb0
[11798.061063]  [<ffffffff810864ea>] ?  call_usermodehelper_exec_async+0x13a/0x140
[11798.061063]  [<ffffffff81534a45>] ret_from_fork+0x25/0x30

Use the right index in the inner loop.

Fixes: d05d7f40791c ("Merge branch 'for-4.8/core' of git://git.kernel.dk/linux-block")
Fixes: 3fc9d690936f ("Merge branch 'for-4.8/drivers' of git://git.kernel.dk/linux-block")
Signed-off-by: Munehisa Kamata <kamatam@amazon.com>
Reviewed-by: Thomas Friebel <friebelt@amazon.de>
Reviewed-by: Eduardo Valentin <eduval@amazon.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Roger Pau Monne <roger.pau@citrix.com>
Cc: xen-devel@lists.xenproject.org
Cc: stable@vger.kernel.org
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkfront.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 98e34e4c62b8..2468c28d4771 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2075,9 +2075,9 @@ static int blkfront_resume(struct xenbus_device *dev)
 			/*
 			 * Get the bios in the request so we can re-queue them.
 			 */
-			if (req_op(shadow[i].request) == REQ_OP_FLUSH ||
-			    req_op(shadow[i].request) == REQ_OP_DISCARD ||
-			    req_op(shadow[i].request) == REQ_OP_SECURE_ERASE ||
+			if (req_op(shadow[j].request) == REQ_OP_FLUSH ||
+			    req_op(shadow[j].request) == REQ_OP_DISCARD ||
+			    req_op(shadow[j].request) == REQ_OP_SECURE_ERASE ||
 			    shadow[j].request->cmd_flags & REQ_FUA) {
 				/*
 				 * Flush operations don't contain bios, so

From 42819eb7a0957cc340ad4ed8bba736bab5ebc464 Mon Sep 17 00:00:00 2001
From: Martin Wilck <mwilck@suse.com>
Date: Mon, 14 Aug 2017 22:12:37 +0200
Subject: [PATCH 4/9] nvmet: don't overwrite identify sn/fr with 0-bytes

The merged version of my patch "nvmet: don't report 0-bytes in serial
number" fails to remove two lines which should have been replaced,
so that the space-padded strings are overwritten again with 0-bytes.
Fix it.

Fixes: 42de82a8b544 nvmet: don't report 0-bytes in serial number
Signed-off-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: Sagi Grimberg <sagi@grimbeg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/admin-cmd.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 2d7a98ab53fb..a53bb6635b83 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -199,12 +199,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 	copy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1);
 	copy_and_pad(id->fr, sizeof(id->fr), UTS_RELEASE, strlen(UTS_RELEASE));
 
-	memset(id->mn, ' ', sizeof(id->mn));
-	strncpy((char *)id->mn, "Linux", sizeof(id->mn));
-
-	memset(id->fr, ' ', sizeof(id->fr));
-	strncpy((char *)id->fr, UTS_RELEASE, sizeof(id->fr));
-
 	id->rab = 6;
 
 	/*

From 16a5a480f067f945fd27bf91ffdce3f959b0d4b6 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 14 Aug 2017 11:20:32 -0700
Subject: [PATCH 5/9] nvmet-fc: correct use after free on list teardown

Use list_for_each_entry_safe to prevent list handling from referencing
next pointers directly after list_del's

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/fc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 1b7f2520a20d..b200f9aadd52 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -704,7 +704,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
 {
 	struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport;
 	struct nvmet_fc_fcp_iod *fod = queue->fod;
-	struct nvmet_fc_defer_fcp_req *deferfcp;
+	struct nvmet_fc_defer_fcp_req *deferfcp, *tempptr;
 	unsigned long flags;
 	int i, writedataactive;
 	bool disconnect;
@@ -735,7 +735,8 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
 	}
 
 	/* Cleanup defer'ed IOs in queue */
-	list_for_each_entry(deferfcp, &queue->avail_defer_list, req_list) {
+	list_for_each_entry_safe(deferfcp, tempptr, &queue->avail_defer_list,
+				req_list) {
 		list_del(&deferfcp->req_list);
 		kfree(deferfcp);
 	}

From 369157b41cca435442cf5add9df209aaf951860d Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Wed, 16 Aug 2017 10:47:03 -0700
Subject: [PATCH 6/9] nvmet-fc: eliminate incorrect static markers on local
 variables

There were 2 statics introduced that were bogus. Removed the static
designations.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/fc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index b200f9aadd52..309c84aa7595 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -394,7 +394,7 @@ nvmet_fc_free_ls_iodlist(struct nvmet_fc_tgtport *tgtport)
 static struct nvmet_fc_ls_iod *
 nvmet_fc_alloc_ls_iod(struct nvmet_fc_tgtport *tgtport)
 {
-	static struct nvmet_fc_ls_iod *iod;
+	struct nvmet_fc_ls_iod *iod;
 	unsigned long flags;
 
 	spin_lock_irqsave(&tgtport->lock, flags);
@@ -471,7 +471,7 @@ nvmet_fc_destroy_fcp_iodlist(struct nvmet_fc_tgtport *tgtport,
 static struct nvmet_fc_fcp_iod *
 nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue)
 {
-	static struct nvmet_fc_fcp_iod *fod;
+	struct nvmet_fc_fcp_iod *fod;
 
 	lockdep_assert_held(&queue->qlock);
 

From 81a0b8d74edd5841be29d223ce44bc8db2b00d09 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Aug 2017 13:57:49 +0200
Subject: [PATCH 7/9] nvme-fabrics: fix reporting of unrecognized options

Only print the specified options that are not recognized, instead
of the whole list of options.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
---
 drivers/nvme/host/fabrics.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 2e582a240943..5f5cd306f76d 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -794,7 +794,8 @@ static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts,
 		int i;
 
 		for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) {
-			if (opt_tokens[i].token & ~allowed_opts) {
+			if ((opt_tokens[i].token & opts->mask) &&
+			    (opt_tokens[i].token & ~allowed_opts)) {
 				pr_warn("invalid parameter '%s'\n",
 					opt_tokens[i].pattern);
 			}

From e9d8a0fdeacd843c85dcef480cdb2ab76bcdb6e4 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 17 Aug 2017 16:45:06 -0400
Subject: [PATCH 8/9] nvme-pci: set cqe_seen on polled completions

Fixes: 920d13a884 ("nvme-pci: factor out the cqe reading mechanics from __nvme_process_cq")
Reported-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 74a124a06264..925467b31a33 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -801,6 +801,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
 		return;
 	}
 
+	nvmeq->cqe_seen = 1;
 	req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
 	nvme_end_request(req, cqe->status, cqe->result);
 }
@@ -830,10 +831,8 @@ static void nvme_process_cq(struct nvme_queue *nvmeq)
 		consumed++;
 	}
 
-	if (consumed) {
+	if (consumed)
 		nvme_ring_cq_doorbell(nvmeq);
-		nvmeq->cqe_seen = 1;
-	}
 }
 
 static irqreturn_t nvme_irq(int irq, void *data)

From c005390374957baacbc38eef96ea360559510aa7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Aug 2017 12:24:47 +0200
Subject: [PATCH 9/9] blk-mq-pci: add a fallback when pci_irq_get_affinity
 returns NULL

While pci_irq_get_affinity should never fail for SMP kernel that
implement the affinity mapping, it will always return NULL in the
UP case, so provide a fallback mapping of all queues to CPU 0 in
that case.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-pci.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c
index 0c3354cf3552..76944e3271bf 100644
--- a/block/blk-mq-pci.c
+++ b/block/blk-mq-pci.c
@@ -36,12 +36,18 @@ int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev)
 	for (queue = 0; queue < set->nr_hw_queues; queue++) {
 		mask = pci_irq_get_affinity(pdev, queue);
 		if (!mask)
-			return -EINVAL;
+			goto fallback;
 
 		for_each_cpu(cpu, mask)
 			set->mq_map[cpu] = queue;
 	}
 
 	return 0;
+
+fallback:
+	WARN_ON_ONCE(set->nr_hw_queues > 1);
+	for_each_possible_cpu(cpu)
+		set->mq_map[cpu] = 0;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(blk_mq_pci_map_queues);