Discussion:
[PATCH 4/4] nvme/pci: switch to pci_request_irq
(too old to reply)
Christoph Hellwig
2017-04-13 07:10:02 UTC
Permalink
Signed-off-by: Christoph Hellwig <***@lst.de>
---
drivers/nvme/host/pci.c | 30 +++++++++++++-----------------
1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 26a5fd05fe88..925997127a6b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -117,7 +117,6 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
struct nvme_queue {
struct device *q_dmadev;
struct nvme_dev *dev;
- char irqname[24]; /* nvme4294967295-65535\0 */
spinlock_t q_lock;
struct nvme_command *sq_cmds;
struct nvme_command __iomem *sq_cmds_io;
@@ -204,11 +203,6 @@ static unsigned int nvme_cmd_size(struct nvme_dev *dev)
nvme_iod_alloc_size(dev, NVME_INT_BYTES(dev), NVME_INT_PAGES);
}

-static int nvmeq_irq(struct nvme_queue *nvmeq)
-{
- return pci_irq_vector(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector);
-}
-
static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
@@ -962,7 +956,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
spin_unlock_irq(&nvmeq->q_lock);
return 1;
}
- vector = nvmeq_irq(nvmeq);
+ vector = nvmeq->cq_vector;
nvmeq->dev->online_queues--;
nvmeq->cq_vector = -1;
spin_unlock_irq(&nvmeq->q_lock);
@@ -970,7 +964,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q);

- free_irq(vector, nvmeq);
+ pci_free_irq(to_pci_dev(nvmeq->dev->dev), vector, nvmeq);

return 0;
}
@@ -1055,8 +1049,6 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,

nvmeq->q_dmadev = dev->dev;
nvmeq->dev = dev;
- snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d",
- dev->ctrl.instance, qid);
spin_lock_init(&nvmeq->q_lock);
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
@@ -1079,12 +1071,16 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,

static int queue_request_irq(struct nvme_queue *nvmeq)
{
- if (use_threaded_interrupts)
- return request_threaded_irq(nvmeq_irq(nvmeq), nvme_irq_check,
- nvme_irq, IRQF_SHARED, nvmeq->irqname, nvmeq);
- else
- return request_irq(nvmeq_irq(nvmeq), nvme_irq, IRQF_SHARED,
- nvmeq->irqname, nvmeq);
+ struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev);
+ int nr = nvmeq->dev->ctrl.instance;
+
+ if (use_threaded_interrupts) {
+ return pci_request_irq(pdev, nvmeq->cq_vector, nvme_irq_check,
+ nvme_irq, nvmeq, "nvme%dq%d", nr, nvmeq->qid);
+ } else {
+ return pci_request_irq(pdev, nvmeq->cq_vector, nvme_irq,
+ NULL, nvmeq, "nvme%dq%d", nr, nvmeq->qid);
+ }
}

static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
@@ -1440,7 +1436,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
}

/* Deregister the admin queue's interrupt */
- free_irq(pci_irq_vector(pdev, 0), adminq);
+ pci_free_irq(pdev, 0, adminq);

/*
* If we enable msix early due to not intx, disable it again before
--
2.11.0
Christoph Hellwig
2017-04-13 07:10:02 UTC
Permalink
Signed-off-by: Christoph Hellwig <***@lst.de>
---
kernel/irq/manage.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index a4afe5cc5af1..391cb738b2db 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1557,7 +1557,7 @@ void remove_irq(unsigned int irq, struct irqaction *act)
struct irq_desc *desc = irq_to_desc(irq);

if (desc && !WARN_ON(irq_settings_is_per_cpu_devid(desc)))
- __free_irq(irq, act->dev_id);
+ __free_irq(irq, act->dev_id);
}
EXPORT_SYMBOL_GPL(remove_irq);
--
2.11.0
Thomas Gleixner
2017-04-14 17:30:01 UTC
Permalink
Reviewed-by: Thomas Gleixner <***@linutronix.de>
Sagi Grimberg
2017-04-20 13:30:02 UTC
Permalink
Reviewed-by: Sagi Grimberg <***@grimberg.me>
Christoph Hellwig
2017-04-13 07:10:02 UTC
Permalink
These are small wrappers around request_threaded_irq and free_irq,
which dynamically allocate space for the device name so that drivers
don't need to keep static buffers for these around. Additionally it
works with device-relative vector numbers to make the usage easier,
and force the IRQF_SHARED flag on given that it has no runtime overhead
and should be supported by all PCI devices.

Signed-off-by: Christoph Hellwig <***@lst.de>
---
drivers/pci/irq.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
include/linux/pci.h | 6 ++++++
2 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/irq.c b/drivers/pci/irq.c
index 6684f153ab57..53e7f30d9461 100644
--- a/drivers/pci/irq.c
+++ b/drivers/pci/irq.c
@@ -1,7 +1,8 @@
/*
- * PCI IRQ failure handing code
+ * PCI IRQ handing code
*
* Copyright (c) 2008 James Bottomley <***@HansenPartnership.com>
+ * Copyright (C) 2017 Christoph Hellwig.
*/

#include <linux/acpi.h>
@@ -59,3 +60,61 @@ enum pci_lost_interrupt_reason pci_lost_interrupt(struct pci_dev *pdev)
return PCI_LOST_IRQ_NO_INFORMATION;
}
EXPORT_SYMBOL(pci_lost_interrupt);
+
+/**
+ * pci_request_irq - allocate an interrupt line for a PCI device
+ * @dev: PCI device to operate on
+ * @nr: device-relative interrupt vector index (0-based).
+ * @handler: Function to be called when the IRQ occurs.
+ * Primary handler for threaded interrupts.
+ * If NULL and thread_fn != NULL the default primary handler is
+ * installed.
+ * @thread_fn: Function called from the irq handler thread
+ * If NULL, no irq thread is created
+ * @dev_id: Cookie passed back to the handler function
+ * @fmt: Printf-like format string naming the handler
+ *
+ * This call allocates interrupt resources and enables the interrupt line and
+ * IRQ handling. From the point this call is made @handler and @thread_fn may
+ * be invoked. All interrupt requsted using this function might be shared.
+ *
+ * @dev_id must not ne NULL and be globally unique.
+ */
+int pci_request_irq(struct pci_dev *dev, unsigned int nr, irq_handler_t handler,
+ irq_handler_t thread_fn, void *dev_id, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+ char *devname;
+
+ va_start(ap, fmt);
+ devname = kvasprintf(GFP_KERNEL, fmt, ap);
+ va_end(ap);
+
+ ret = request_threaded_irq(pci_irq_vector(dev, nr), handler, thread_fn,
+ IRQF_SHARED, devname, dev_id);
+ if (ret)
+ kfree(devname);
+ return ret;
+}
+EXPORT_SYMBOL(pci_request_irq);
+
+/**
+ * pci_free_irq - free an interrupt allocated with pci_request_irq
+ * @dev: PCI device to operate on
+ * @nr: device-relative interrupt vector index (0-based).
+ * @dev_id: Device identity to free
+ *
+ * Remove an interrupt handler. The handler is removed and if the interrupt
+ * line is no longer in use by any driver it is disabled. The caller must
+ * ensure the interrupt is disabled on the device before calling this function.
+ * The function does not return until any executing interrupts for this IRQ
+ * have completed.
+ *
+ * This function must not be called from interrupt context.
+ */
+void pci_free_irq(struct pci_dev *dev, unsigned int nr, void *dev_id)
+{
+ kfree(free_irq(pci_irq_vector(dev, nr), dev_id));
+}
+EXPORT_SYMBOL(pci_free_irq);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index eb3da1a04e6c..b23f81b583ab 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -28,6 +28,7 @@
#include <linux/kobject.h>
#include <linux/atomic.h>
#include <linux/device.h>
+#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/resource_ext.h>
#include <uapi/linux/pci.h>
@@ -1072,6 +1073,11 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags);
bool pci_device_is_present(struct pci_dev *pdev);
void pci_ignore_hotplug(struct pci_dev *dev);

+int __printf(6, 7) pci_request_irq(struct pci_dev *dev, unsigned int nr,
+ irq_handler_t handler, irq_handler_t thread_fn, void *dev_id,
+ const char *fmt, ...);
+void pci_free_irq(struct pci_dev *dev, unsigned int nr, void *dev_id);
+
/* ROM control related routines */
int pci_enable_rom(struct pci_dev *pdev);
void pci_disable_rom(struct pci_dev *pdev);
--
2.11.0
Bjorn Helgaas
2017-04-14 14:50:01 UTC
Permalink
Post by Christoph Hellwig
These are small wrappers around request_threaded_irq and free_irq,
which dynamically allocate space for the device name so that drivers
don't need to keep static buffers for these around. Additionally it
works with device-relative vector numbers to make the usage easier,
and force the IRQF_SHARED flag on given that it has no runtime overhead
and should be supported by all PCI devices.
Acked-by: Bjorn Helgaas <***@google.com>

Let me know if you'd like me to take this series (given an appropriate
ack for the genirq parts).
Post by Christoph Hellwig
---
drivers/pci/irq.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
include/linux/pci.h | 6 ++++++
2 files changed, 66 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/irq.c b/drivers/pci/irq.c
index 6684f153ab57..53e7f30d9461 100644
--- a/drivers/pci/irq.c
+++ b/drivers/pci/irq.c
@@ -1,7 +1,8 @@
/*
- * PCI IRQ failure handing code
+ * PCI IRQ handing code
*
+ * Copyright (C) 2017 Christoph Hellwig.
*/
#include <linux/acpi.h>
@@ -59,3 +60,61 @@ enum pci_lost_interrupt_reason pci_lost_interrupt(struct pci_dev *pdev)
return PCI_LOST_IRQ_NO_INFORMATION;
}
EXPORT_SYMBOL(pci_lost_interrupt);
+
+/**
+ * pci_request_irq - allocate an interrupt line for a PCI device
+ * Primary handler for threaded interrupts.
+ * If NULL and thread_fn != NULL the default primary handler is
+ * installed.
+ * If NULL, no irq thread is created
+ *
+ * This call allocates interrupt resources and enables the interrupt line and
+ * be invoked. All interrupt requsted using this function might be shared.
+ *
+ */
+int pci_request_irq(struct pci_dev *dev, unsigned int nr, irq_handler_t handler,
+ irq_handler_t thread_fn, void *dev_id, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+ char *devname;
+
+ va_start(ap, fmt);
+ devname = kvasprintf(GFP_KERNEL, fmt, ap);
+ va_end(ap);
+
+ ret = request_threaded_irq(pci_irq_vector(dev, nr), handler, thread_fn,
+ IRQF_SHARED, devname, dev_id);
+ if (ret)
+ kfree(devname);
+ return ret;
+}
+EXPORT_SYMBOL(pci_request_irq);
+
+/**
+ * pci_free_irq - free an interrupt allocated with pci_request_irq
+ *
+ * Remove an interrupt handler. The handler is removed and if the interrupt
+ * line is no longer in use by any driver it is disabled. The caller must
+ * ensure the interrupt is disabled on the device before calling this function.
+ * The function does not return until any executing interrupts for this IRQ
+ * have completed.
+ *
+ * This function must not be called from interrupt context.
+ */
+void pci_free_irq(struct pci_dev *dev, unsigned int nr, void *dev_id)
+{
+ kfree(free_irq(pci_irq_vector(dev, nr), dev_id));
+}
+EXPORT_SYMBOL(pci_free_irq);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index eb3da1a04e6c..b23f81b583ab 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -28,6 +28,7 @@
#include <linux/kobject.h>
#include <linux/atomic.h>
#include <linux/device.h>
+#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/resource_ext.h>
#include <uapi/linux/pci.h>
@@ -1072,6 +1073,11 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags);
bool pci_device_is_present(struct pci_dev *pdev);
void pci_ignore_hotplug(struct pci_dev *dev);
+int __printf(6, 7) pci_request_irq(struct pci_dev *dev, unsigned int nr,
+ irq_handler_t handler, irq_handler_t thread_fn, void *dev_id,
+ const char *fmt, ...);
+void pci_free_irq(struct pci_dev *dev, unsigned int nr, void *dev_id);
+
/* ROM control related routines */
int pci_enable_rom(struct pci_dev *pdev);
void pci_disable_rom(struct pci_dev *pdev);
--
2.11.0
Christoph Hellwig
2017-04-14 16:00:02 UTC
Permalink
Post by Bjorn Helgaas
Let me know if you'd like me to take this series (given an appropriate
ack for the genirq parts).
I want to hear from Thomas on the genirq bits. But if he's fine with
it might be a good idea to take it through your tree.
Christoph Hellwig
2017-04-13 07:10:02 UTC
Permalink
This allows callers to get back at them instead of having to store
it in another variable.

Signed-off-by: Christoph Hellwig <***@lst.de>
---
include/linux/interrupt.h | 2 +-
kernel/irq/manage.c | 13 ++++++++++---
2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 53144e78a369..a6fba4804672 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -155,7 +155,7 @@ extern int __must_check
request_percpu_irq(unsigned int irq, irq_handler_t handler,
const char *devname, void __percpu *percpu_dev_id);

-extern void free_irq(unsigned int, void *);
+extern const void *free_irq(unsigned int, void *);
extern void free_percpu_irq(unsigned int, void __percpu *);

struct device;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 391cb738b2db..e688e7e06772 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1574,20 +1574,27 @@ EXPORT_SYMBOL_GPL(remove_irq);
* have completed.
*
* This function must not be called from interrupt context.
+ *
+ * Returns the devname argument passed to request_irq.
*/
-void free_irq(unsigned int irq, void *dev_id)
+const void *free_irq(unsigned int irq, void *dev_id)
{
struct irq_desc *desc = irq_to_desc(irq);
+ struct irqaction *action;
+ const char *devname;

if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc)))
- return;
+ return NULL;

#ifdef CONFIG_SMP
if (WARN_ON(desc->affinity_notify))
desc->affinity_notify = NULL;
#endif

- kfree(__free_irq(irq, dev_id));
+ action = __free_irq(irq, dev_id);
+ devname = action->name;
+ kfree(action);
+ return devname;
}
EXPORT_SYMBOL(free_irq);
--
2.11.0
Thomas Gleixner
2017-04-14 17:30:01 UTC
Permalink
Post by Christoph Hellwig
This allows callers to get back at them instead of having to store
it in another variable.
Alternatively you tell the irq code to free it for you. Patch below.

Thanks,

tglx

8<-----------------------

--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -60,7 +60,9 @@
* IRQF_COND_SUSPEND - If the IRQ is shared with a NO_SUSPEND user, execute this
* interrupt handler after suspending interrupts. For system
* wakeup devices users need to implement wakeup detection in
- * their interrupt handlers.
+ * their interrupt handlers
+ * IRQF_FREE_NAME - Free the memory pointed to by the @name argument of
+ * request_irq() in free_irq()
*/
#define IRQF_SHARED 0x00000080
#define IRQF_PROBE_SHARED 0x00000100
@@ -74,6 +76,7 @@
#define IRQF_NO_THREAD 0x00010000
#define IRQF_EARLY_RESUME 0x00020000
#define IRQF_COND_SUSPEND 0x00040000
+#define IRQF_FREE_NAME 0x00080000

#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)

--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1542,6 +1542,8 @@ static struct irqaction *__free_irq(unsi
irq_chip_pm_put(&desc->irq_data);
module_put(desc->owner);
kfree(action->secondary);
+ if (action->flags & IRQF_FREE_NAME)
+ kfree(action->name);
return action;
}
Christoph Hellwig
2017-04-14 17:40:02 UTC
Permalink
Post by Thomas Gleixner
Post by Christoph Hellwig
This allows callers to get back at them instead of having to store
it in another variable.
Alternatively you tell the irq code to free it for you. Patch below.
Yes, this should work to. But I think having the same layer free
something they allocated again is a tad cleaner in terms of layering.

If you prefer this version I can respin the series.
Thomas Gleixner
2017-04-14 18:00:02 UTC
Permalink
Post by Christoph Hellwig
Post by Thomas Gleixner
Post by Christoph Hellwig
This allows callers to get back at them instead of having to store
it in another variable.
Alternatively you tell the irq code to free it for you. Patch below.
Yes, this should work to. But I think having the same layer free
something they allocated again is a tad cleaner in terms of layering.
Fair enough. Was just a thought.

Bjorn, please route it through PCI alltogether.

For the whole series:

Reviewed-by: Thomas Gleixner <***@linutronix.de>

Thanks,

tglx
Keith Busch
2017-04-13 20:00:02 UTC
Permalink
This is great. As an added bonus, more of struct nvme_queue's hot values
are in the same cache line!

Reviewed-by: Keith Busch <***@intel.com>
Sagi Grimberg
2017-04-20 16:30:02 UTC
Permalink
Post by Keith Busch
This is great. As an added bonus, more of struct nvme_queue's hot values
are in the same cache line!
I think I can kill off for mlx5 as well.

Very useful Christoph!

Reviewed-by: Sagi Grimberg <***@grimberg.me>

Bjorn Helgaas
2017-04-18 18:50:02 UTC
Permalink
PCI drivers that support multiple MSI or MSI-X vectors currently have a
lot of boileplate code to generate names for each vector (or simply use
the same name for all of them, which isn't nice either). This series
adds new helpers that allocate and free a name based on a format string
passed to the request_irq wrapper.
They also hide the [pci_dev,vector] to Linux irq number translation,
in fact the example NVMe driver conversion now never sees the Linux irq
number. That might be useful to isolate PCI drivers from our IRQ numbers,
although to have a full abstraction we'd need similar wrappers for
{enable,disable,synchronize}_irq as well.
Applied with reviewed-by from Thomas and Keith to pci/irq for v4.12,
thanks!
Loading...