LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: <Mario.Limonciello@dell.com>
To: <keith.busch@intel.com>, <hch@lst.de>, <sagi@grimberg.me>,
	<linux-nvme@lists.infradead.org>
Cc: <rafael@kernel.org>, <linux-kernel@vger.kernel.org>,
	<linux-pm@vger.kernel.org>, <kai.heng.feng@canonical.com>
Subject: RE: [PATCH] nvme/pci: Use host managed power state for suspend
Date: Sat, 11 May 2019 00:52:02 +0000	[thread overview]
Message-ID: <0080aaff18e5445dabca509d4113eca8@AUSX13MPC105.AMER.DELL.COM> (raw)
In-Reply-To: <20190510212937.11661-1-keith.busch@intel.com>

> 
> Cc: Mario Limonciello <Mario.Limonciello@dell.com>
> Cc: Kai Heng Feng <kai.heng.feng@canonical.com>
> Signed-off-by: Keith Busch <keith.busch@intel.com>
> ---
> Disclaimer: I've tested only on emulation faking support for the feature.

Thanks for sharing.  I'll arrange some testing with this with storage partners early 
next week.

> 
> General question: different devices potentially have divergent values for power
> consumption and transition latencies. Would it be useful to allow a user tunable
> setting to select the desired target power state instead of assuming the lowest
> one?

Since this action only happens on the way down to suspend to idle I don't think
there is a lot of value in configuring this to be user tunable which state is used.

If you don't go into the deepest state for NVME, at least on Intel the PCH generally
won't be able to go into its deepest state either.

> 
>  drivers/nvme/host/core.c | 27 ++++++++++++++++++++++++
> drivers/nvme/host/nvme.h |  2 ++  drivers/nvme/host/pci.c  | 53
> ++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 82 insertions(+)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index
> a6644a2c3ef7..eb3640fd8838 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -1132,6 +1132,33 @@ static int nvme_set_features(struct nvme_ctrl *dev,
> unsigned fid, unsigned dword
>  	return ret;
>  }
> 
> +int nvme_set_power(struct nvme_ctrl *ctrl, unsigned ps) {
> +	return nvme_set_features(ctrl, NVME_FEAT_POWER_MGMT, ps, NULL,
> 0,
> +NULL); } EXPORT_SYMBOL_GPL(nvme_set_power);
> +
> +int nvme_get_power(struct nvme_ctrl *ctrl, u32 *result) {
> +	struct nvme_command c;
> +	union nvme_result res;
> +	int ret;
> +
> +	if (!result)
> +		return -EINVAL;
> +
> +	memset(&c, 0, sizeof(c));
> +	c.features.opcode = nvme_admin_get_features;
> +	c.features.fid = cpu_to_le32(NVME_FEAT_POWER_MGMT);
> +
> +	ret = __nvme_submit_sync_cmd(ctrl->admin_q, &c, &res,
> +			NULL, 0, 0, NVME_QID_ANY, 0, 0, false);
> +	if (ret >= 0)
> +		*result = le32_to_cpu(res.u32);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(nvme_get_power);
> +
>  int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)  {
>  	u32 q_count = (*count - 1) | ((*count - 1) << 16); diff --git
> a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index
> 5ee75b5ff83f..eaa571ac06d2 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -459,6 +459,8 @@ int __nvme_submit_sync_cmd(struct request_queue *q,
> struct nvme_command *cmd,
>  		unsigned timeout, int qid, int at_head,
>  		blk_mq_req_flags_t flags, bool poll);  int
> nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
> +int nvme_set_power(struct nvme_ctrl *ctrl, unsigned ps); int
> +nvme_get_power(struct nvme_ctrl *ctrl, u32 *result);
>  void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);  int nvme_reset_ctrl(struct
> nvme_ctrl *ctrl);  int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); diff --git
> a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index
> 3e4fb891a95a..0d5d91e5b293 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -18,6 +18,7 @@
>  #include <linux/mutex.h>
>  #include <linux/once.h>
>  #include <linux/pci.h>
> +#include <linux/suspend.h>
>  #include <linux/t10-pi.h>
>  #include <linux/types.h>
>  #include <linux/io-64-nonatomic-lo-hi.h> @@ -116,6 +117,7 @@ struct
> nvme_dev {
>  	u32 cmbsz;
>  	u32 cmbloc;
>  	struct nvme_ctrl ctrl;
> +	u32 last_ps;
> 
>  	mempool_t *iod_mempool;
> 
> @@ -2828,11 +2830,59 @@ static void nvme_remove(struct pci_dev *pdev)  }
> 
>  #ifdef CONFIG_PM_SLEEP
> +static int nvme_deep_state(struct nvme_dev *dev) {
> +	struct pci_dev *pdev = to_pci_dev(dev->dev);
> +	int ret;
> +
> +	/*
> +	 * Save the current power state in case a user tool set a power policy
> +	 * for this device. We'll restore that state on resume.
> +	 */
> +	dev->last_ps = 0;
> +	ret = nvme_get_power(&dev->ctrl, &dev->last_ps);
> +
> +	/*
> +	 * Return the error to halt suspend if the driver either couldn't
> +	 * submit a command or didn't see a response.
> +	 */
> +	if (ret < 0)
> +		return ret;
> +
> +	ret = nvme_set_power(&dev->ctrl, dev->ctrl.npss);
> +	if (ret < 0)
> +		return ret;
> +
> +	if (!ret) {
> +		/*
> +		 * A saved state prevents pci pm from generically controlling
> +		 * the device's power. We're using protocol specific settings
> +		 * so we don't want pci interfering.
> +		 */
> +		pci_save_state(pdev);
> +	} else {
> +		/*
> +		 * The drive failed the low power request. Fallback to device
> +		 * shutdown and clear npss to force a controller reset on
> +		 * resume. The value will be rediscovered during reset.
> +		 */
> +		dev->ctrl.npss = 0;
> +		nvme_dev_disable(dev, true);
> +	}
> +	return 0;
> +}
> +
>  static int nvme_suspend(struct device *dev)  {
>  	struct pci_dev *pdev = to_pci_dev(dev);
>  	struct nvme_dev *ndev = pci_get_drvdata(pdev);
> 
> +	/*
> +	 * Try to use nvme if the device supports host managed power settings
> +	 * and platform firmware is not involved.
> +	 */
> +	if (!pm_suspend_via_firmware() && ndev->ctrl.npss)
> +		return nvme_deep_state(ndev);
>  	nvme_dev_disable(ndev, true);
>  	return 0;
>  }
> @@ -2842,6 +2892,9 @@ static int nvme_resume(struct device *dev)
>  	struct pci_dev *pdev = to_pci_dev(dev);
>  	struct nvme_dev *ndev = pci_get_drvdata(pdev);
> 
> +	if (!pm_resume_via_firmware() && ndev->ctrl.npss)
> +		if (nvme_set_power(&ndev->ctrl, ndev->last_ps) == 0)
> +			return 0;
>  	nvme_reset_ctrl(&ndev->ctrl);
>  	return 0;
>  }
> --
> 2.14.4


  reply	other threads:[~2019-05-11  0:52 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-10 21:29 Keith Busch
2019-05-11  0:52 ` Mario.Limonciello [this message]
2019-05-13 14:24   ` Mario.Limonciello
2019-05-13 14:37     ` Christoph Hellwig
2019-05-13 14:43       ` Mario.Limonciello
2019-05-13 14:54         ` Christoph Hellwig
2019-05-13 14:55         ` Keith Busch
2019-05-13 15:05           ` Mario.Limonciello
2019-05-13 15:04             ` Keith Busch
2019-05-14  8:04               ` Rafael J. Wysocki
2019-05-14 22:16                 ` Keith Busch
2019-05-15  9:00                   ` Rafael J. Wysocki
2019-05-13 14:37     ` Keith Busch
2019-05-13 14:54       ` Mario.Limonciello
2019-05-13 14:57         ` Christoph Hellwig
2019-05-13 15:16           ` Keith Busch
2019-05-13 17:16             ` Kai-Heng Feng
2019-05-13 18:16               ` Keith Busch
2019-05-13 18:01           ` Mario.Limonciello
2019-05-14  6:11             ` Christoph Hellwig
2019-05-11  1:33 ` Edmund Nadolski
2019-05-11  7:22 ` Christoph Hellwig
2019-05-12 14:34   ` Sagi Grimberg
2019-05-13 13:36   ` Keith Busch
2019-05-12  6:06 ` Chaitanya Kulkarni
2019-05-12 14:30   ` Minwoo Im
2019-05-12 15:20     ` Chaitanya Kulkarni
2019-05-13 13:47   ` Keith Busch

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0080aaff18e5445dabca509d4113eca8@AUSX13MPC105.AMER.DELL.COM \
    --to=mario.limonciello@dell.com \
    --cc=hch@lst.de \
    --cc=kai.heng.feng@canonical.com \
    --cc=keith.busch@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=rafael@kernel.org \
    --cc=sagi@grimberg.me \
    --subject='RE: [PATCH] nvme/pci: Use host managed power state for suspend' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).