LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH 1/2] hwmon: (k10temp) Fix reading critical temperature register
@ 2018-04-27  3:13 Guenter Roeck
  2018-04-27  3:13 ` [PATCH 2/2] hwmon: (k10temp) Display both Tctl and Tdie Guenter Roeck
  2018-04-29 14:59 ` [1/2] hwmon: (k10temp) Fix reading critical temperature register Guenter Roeck
  0 siblings, 2 replies; 5+ messages in thread
From: Guenter Roeck @ 2018-04-27  3:13 UTC (permalink / raw)
  To: Clemens Ladisch
  Cc: Jean Delvare, linux-hwmon, linux-kernel, Brian Woods, Guenter Roeck

The HTC (Hardware Temperature Control) register has moved
for recent chips.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
Compile tested only.

 drivers/hwmon/k10temp.c | 37 +++++++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index d2cc55e21374..b06bb1f90853 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -63,10 +63,12 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
 #define  NB_CAP_HTC			0x00000400
 
 /*
- * For F15h M60h, functionality of REG_REPORTED_TEMPERATURE
- * has been moved to D0F0xBC_xD820_0CA4 [Reported Temperature
- * Control]
+ * For F15h M60h and M70h, REG_HARDWARE_THERMAL_CONTROL
+ * and REG_REPORTED_TEMPERATURE have been moved to
+ * D0F0xBC_xD820_0C64 [Hardware Temperature Control]
+ * D0F0xBC_xD820_0CA4 [Reported Temperature Control]
  */
+#define F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET	0xd8200c64
 #define F15H_M60H_REPORTED_TEMP_CTRL_OFFSET	0xd8200ca4
 
 /* F17h M01h Access througn SMN */
@@ -74,6 +76,7 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
 
 struct k10temp_data {
 	struct pci_dev *pdev;
+	void (*read_htcreg)(struct pci_dev *pdev, u32 *regval);
 	void (*read_tempreg)(struct pci_dev *pdev, u32 *regval);
 	int temp_offset;
 	u32 temp_adjust_mask;
@@ -98,6 +101,11 @@ static const struct tctl_offset tctl_offset_table[] = {
 	{ 0x17, "AMD Ryzen Threadripper 1910", 10000 },
 };
 
+static void read_htcreg_pci(struct pci_dev *pdev, u32 *regval)
+{
+	pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL, regval);
+}
+
 static void read_tempreg_pci(struct pci_dev *pdev, u32 *regval)
 {
 	pci_read_config_dword(pdev, REG_REPORTED_TEMPERATURE, regval);
@@ -114,6 +122,12 @@ static void amd_nb_index_read(struct pci_dev *pdev, unsigned int devfn,
 	mutex_unlock(&nb_smu_ind_mutex);
 }
 
+static void read_htcreg_nb_f15(struct pci_dev *pdev, u32 *regval)
+{
+	amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8,
+			  F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET, regval);
+}
+
 static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval)
 {
 	amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8,
@@ -181,13 +195,18 @@ static umode_t k10temp_is_visible(struct kobject *kobj,
 	struct pci_dev *pdev = data->pdev;
 
 	if (index >= 2) {
-		u32 reg_caps, reg_htc;
+		u32 reg;
+
+		if (!data->read_htcreg)
+			return 0;
 
 		pci_read_config_dword(pdev, REG_NORTHBRIDGE_CAPABILITIES,
-				      &reg_caps);
-		pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL,
-				      &reg_htc);
-		if (!(reg_caps & NB_CAP_HTC) || !(reg_htc & HTC_ENABLE))
+				      &reg);
+		if (!(reg & NB_CAP_HTC))
+			return 0;
+
+		data->read_htcreg(data->pdev, &reg);
+		if (!(reg & HTC_ENABLE))
 			return 0;
 	}
 	return attr->mode;
@@ -268,11 +287,13 @@ static int k10temp_probe(struct pci_dev *pdev,
 
 	if (boot_cpu_data.x86 == 0x15 && (boot_cpu_data.x86_model == 0x60 ||
 					  boot_cpu_data.x86_model == 0x70)) {
+		data->read_htcreg = read_htcreg_nb_f15;
 		data->read_tempreg = read_tempreg_nb_f15;
 	} else if (boot_cpu_data.x86 == 0x17) {
 		data->temp_adjust_mask = 0x80000;
 		data->read_tempreg = read_tempreg_nb_f17;
 	} else {
+		data->read_htcreg = read_htcreg_pci;
 		data->read_tempreg = read_tempreg_pci;
 	}
 
-- 
2.7.4

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 2/2] hwmon: (k10temp) Display both Tctl and Tdie
  2018-04-27  3:13 [PATCH 1/2] hwmon: (k10temp) Fix reading critical temperature register Guenter Roeck
@ 2018-04-27  3:13 ` Guenter Roeck
  2018-04-29 16:13   ` Gabriel C
  2018-04-29 14:59 ` [1/2] hwmon: (k10temp) Fix reading critical temperature register Guenter Roeck
  1 sibling, 1 reply; 5+ messages in thread
From: Guenter Roeck @ 2018-04-27  3:13 UTC (permalink / raw)
  To: Clemens Ladisch
  Cc: Jean Delvare, linux-hwmon, linux-kernel, Brian Woods, Guenter Roeck

On some AMD CPUs, there is a different between the die temperature
(Tdie) and the reported temperature (Tctl). Tdie is the real measured
temperature, and Tctl is used for fan control. Lets report both for
affected CPUs.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/k10temp.c | 55 ++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 48 insertions(+), 7 deletions(-)

diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index b06bb1f90853..1135b8f1ad0f 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -80,6 +80,7 @@ struct k10temp_data {
 	void (*read_tempreg)(struct pci_dev *pdev, u32 *regval);
 	int temp_offset;
 	u32 temp_adjust_mask;
+	bool show_tdie;
 };
 
 struct tctl_offset {
@@ -140,17 +141,24 @@ static void read_tempreg_nb_f17(struct pci_dev *pdev, u32 *regval)
 			  F17H_M01H_REPORTED_TEMP_CTRL_OFFSET, regval);
 }
 
-static ssize_t temp1_input_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
+unsigned int get_raw_temp(struct k10temp_data *data)
 {
-	struct k10temp_data *data = dev_get_drvdata(dev);
-	u32 regval;
 	unsigned int temp;
+	u32 regval;
 
 	data->read_tempreg(data->pdev, &regval);
 	temp = (regval >> 21) * 125;
 	if (regval & data->temp_adjust_mask)
 		temp -= 49000;
+	return temp;
+}
+
+static ssize_t temp1_input_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct k10temp_data *data = dev_get_drvdata(dev);
+	unsigned int temp = get_raw_temp(data);
+
 	if (temp > data->temp_offset)
 		temp -= data->temp_offset;
 	else
@@ -159,6 +167,23 @@ static ssize_t temp1_input_show(struct device *dev,
 	return sprintf(buf, "%u\n", temp);
 }
 
+static ssize_t temp2_input_show(struct device *dev,
+				struct device_attribute *devattr, char *buf)
+{
+	struct k10temp_data *data = dev_get_drvdata(dev);
+	unsigned int temp = get_raw_temp(data);
+
+	return sprintf(buf, "%u\n", temp);
+}
+
+static ssize_t temp_label_show(struct device *dev,
+			       struct device_attribute *devattr, char *buf)
+{
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+
+	return sprintf(buf, "%s\n", attr->index ? "Tctl" : "Tdie");
+}
+
 static ssize_t temp1_max_show(struct device *dev,
 			      struct device_attribute *attr, char *buf)
 {
@@ -187,16 +212,23 @@ static DEVICE_ATTR_RO(temp1_max);
 static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, show_temp_crit, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, show_temp_crit, NULL, 1);
 
+static SENSOR_DEVICE_ATTR(temp1_label, 0444, temp_label_show, NULL, 0);
+static DEVICE_ATTR_RO(temp2_input);
+static SENSOR_DEVICE_ATTR(temp2_label, 0444, temp_label_show, NULL, 1);
+
 static umode_t k10temp_is_visible(struct kobject *kobj,
 				  struct attribute *attr, int index)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct k10temp_data *data = dev_get_drvdata(dev);
 	struct pci_dev *pdev = data->pdev;
+	u32 reg;
 
-	if (index >= 2) {
-		u32 reg;
-
+	switch (index) {
+	case 0 ... 1:	/* temp1_input, temp1_max */
+	default:
+		break;
+	case 2 ... 3:	/* temp1_crit, temp1_crit_hyst */
 		if (!data->read_htcreg)
 			return 0;
 
@@ -208,6 +240,11 @@ static umode_t k10temp_is_visible(struct kobject *kobj,
 		data->read_htcreg(data->pdev, &reg);
 		if (!(reg & HTC_ENABLE))
 			return 0;
+		break;
+	case 4 ... 6:	/* temp1_label, temp2_input, temp2_label */
+		if (!data->show_tdie)
+			return 0;
+		break;
 	}
 	return attr->mode;
 }
@@ -217,6 +254,9 @@ static struct attribute *k10temp_attrs[] = {
 	&dev_attr_temp1_max.attr,
 	&sensor_dev_attr_temp1_crit.dev_attr.attr,
 	&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
+	&sensor_dev_attr_temp1_label.dev_attr.attr,
+	&dev_attr_temp2_input.attr,
+	&sensor_dev_attr_temp2_label.dev_attr.attr,
 	NULL
 };
 
@@ -292,6 +332,7 @@ static int k10temp_probe(struct pci_dev *pdev,
 	} else if (boot_cpu_data.x86 == 0x17) {
 		data->temp_adjust_mask = 0x80000;
 		data->read_tempreg = read_tempreg_nb_f17;
+		data->show_tdie = true;
 	} else {
 		data->read_htcreg = read_htcreg_pci;
 		data->read_tempreg = read_tempreg_pci;
-- 
2.7.4

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [1/2] hwmon: (k10temp) Fix reading critical temperature register
  2018-04-27  3:13 [PATCH 1/2] hwmon: (k10temp) Fix reading critical temperature register Guenter Roeck
  2018-04-27  3:13 ` [PATCH 2/2] hwmon: (k10temp) Display both Tctl and Tdie Guenter Roeck
@ 2018-04-29 14:59 ` Guenter Roeck
  1 sibling, 0 replies; 5+ messages in thread
From: Guenter Roeck @ 2018-04-29 14:59 UTC (permalink / raw)
  To: Clemens Ladisch; +Cc: Jean Delvare, linux-hwmon, linux-kernel, Brian Woods

On Thu, Apr 26, 2018 at 08:13:08PM -0700, Guenter Roeck wrote:
> The HTC (Hardware Temperature Control) register has moved
> for recent chips.
> 
> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
> ---
> Compile tested only.

No idea what I did here. I had the same patch in the standalone driver and
it was reported working ... but this version is missing the code to actually
call the new function from show_temp_crit(). I'll send a v2.

Guenter

> 
>  drivers/hwmon/k10temp.c | 37 +++++++++++++++++++++++++++++--------
>  1 file changed, 29 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
> index d2cc55e21374..b06bb1f90853 100644
> --- a/drivers/hwmon/k10temp.c
> +++ b/drivers/hwmon/k10temp.c
> @@ -63,10 +63,12 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
>  #define  NB_CAP_HTC			0x00000400
>  
>  /*
> - * For F15h M60h, functionality of REG_REPORTED_TEMPERATURE
> - * has been moved to D0F0xBC_xD820_0CA4 [Reported Temperature
> - * Control]
> + * For F15h M60h and M70h, REG_HARDWARE_THERMAL_CONTROL
> + * and REG_REPORTED_TEMPERATURE have been moved to
> + * D0F0xBC_xD820_0C64 [Hardware Temperature Control]
> + * D0F0xBC_xD820_0CA4 [Reported Temperature Control]
>   */
> +#define F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET	0xd8200c64
>  #define F15H_M60H_REPORTED_TEMP_CTRL_OFFSET	0xd8200ca4
>  
>  /* F17h M01h Access througn SMN */
> @@ -74,6 +76,7 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
>  
>  struct k10temp_data {
>  	struct pci_dev *pdev;
> +	void (*read_htcreg)(struct pci_dev *pdev, u32 *regval);
>  	void (*read_tempreg)(struct pci_dev *pdev, u32 *regval);
>  	int temp_offset;
>  	u32 temp_adjust_mask;
> @@ -98,6 +101,11 @@ static const struct tctl_offset tctl_offset_table[] = {
>  	{ 0x17, "AMD Ryzen Threadripper 1910", 10000 },
>  };
>  
> +static void read_htcreg_pci(struct pci_dev *pdev, u32 *regval)
> +{
> +	pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL, regval);
> +}
> +
>  static void read_tempreg_pci(struct pci_dev *pdev, u32 *regval)
>  {
>  	pci_read_config_dword(pdev, REG_REPORTED_TEMPERATURE, regval);
> @@ -114,6 +122,12 @@ static void amd_nb_index_read(struct pci_dev *pdev, unsigned int devfn,
>  	mutex_unlock(&nb_smu_ind_mutex);
>  }
>  
> +static void read_htcreg_nb_f15(struct pci_dev *pdev, u32 *regval)
> +{
> +	amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8,
> +			  F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET, regval);
> +}
> +
>  static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval)
>  {
>  	amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8,
> @@ -181,13 +195,18 @@ static umode_t k10temp_is_visible(struct kobject *kobj,
>  	struct pci_dev *pdev = data->pdev;
>  
>  	if (index >= 2) {
> -		u32 reg_caps, reg_htc;
> +		u32 reg;
> +
> +		if (!data->read_htcreg)
> +			return 0;
>  
>  		pci_read_config_dword(pdev, REG_NORTHBRIDGE_CAPABILITIES,
> -				      &reg_caps);
> -		pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL,
> -				      &reg_htc);
> -		if (!(reg_caps & NB_CAP_HTC) || !(reg_htc & HTC_ENABLE))
> +				      &reg);
> +		if (!(reg & NB_CAP_HTC))
> +			return 0;
> +
> +		data->read_htcreg(data->pdev, &reg);
> +		if (!(reg & HTC_ENABLE))
>  			return 0;
>  	}
>  	return attr->mode;
> @@ -268,11 +287,13 @@ static int k10temp_probe(struct pci_dev *pdev,
>  
>  	if (boot_cpu_data.x86 == 0x15 && (boot_cpu_data.x86_model == 0x60 ||
>  					  boot_cpu_data.x86_model == 0x70)) {
> +		data->read_htcreg = read_htcreg_nb_f15;
>  		data->read_tempreg = read_tempreg_nb_f15;
>  	} else if (boot_cpu_data.x86 == 0x17) {
>  		data->temp_adjust_mask = 0x80000;
>  		data->read_tempreg = read_tempreg_nb_f17;
>  	} else {
> +		data->read_htcreg = read_htcreg_pci;
>  		data->read_tempreg = read_tempreg_pci;
>  	}
>  

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] hwmon: (k10temp) Display both Tctl and Tdie
  2018-04-27  3:13 ` [PATCH 2/2] hwmon: (k10temp) Display both Tctl and Tdie Guenter Roeck
@ 2018-04-29 16:13   ` Gabriel C
  2018-04-29 16:27     ` Guenter Roeck
  0 siblings, 1 reply; 5+ messages in thread
From: Gabriel C @ 2018-04-29 16:13 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Clemens Ladisch, Jean Delvare, linux-hwmon, LKML, Brian Woods

2018-04-27 5:13 GMT+02:00 Guenter Roeck <linux@roeck-us.net>:
> On some AMD CPUs, there is a different between the die temperature
> (Tdie) and the reported temperature (Tctl). Tdie is the real measured
> temperature, and Tctl is used for fan control. Lets report both for
> affected CPUs.
>
> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
> ---
>  drivers/hwmon/k10temp.c | 55 ++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 48 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
> index b06bb1f90853..1135b8f1ad0f 100644
> --- a/drivers/hwmon/k10temp.c
> +++ b/drivers/hwmon/k10temp.c
> @@ -80,6 +80,7 @@ struct k10temp_data {
>         void (*read_tempreg)(struct pci_dev *pdev, u32 *regval);
>         int temp_offset;
>         u32 temp_adjust_mask;
> +       bool show_tdie;
>  };
>
>  struct tctl_offset {
> @@ -140,17 +141,24 @@ static void read_tempreg_nb_f17(struct pci_dev *pdev, u32 *regval)
>                           F17H_M01H_REPORTED_TEMP_CTRL_OFFSET, regval);
>  }
>
> -static ssize_t temp1_input_show(struct device *dev,
> -                               struct device_attribute *attr, char *buf)
> +unsigned int get_raw_temp(struct k10temp_data *data)
>  {
> -       struct k10temp_data *data = dev_get_drvdata(dev);
> -       u32 regval;
>         unsigned int temp;
> +       u32 regval;
>
>         data->read_tempreg(data->pdev, &regval);
>         temp = (regval >> 21) * 125;
>         if (regval & data->temp_adjust_mask)
>                 temp -= 49000;
> +       return temp;
> +}
> +
> +static ssize_t temp1_input_show(struct device *dev,
> +                               struct device_attribute *attr, char *buf)
> +{
> +       struct k10temp_data *data = dev_get_drvdata(dev);
> +       unsigned int temp = get_raw_temp(data);
> +
>         if (temp > data->temp_offset)
>                 temp -= data->temp_offset;
>         else
> @@ -159,6 +167,23 @@ static ssize_t temp1_input_show(struct device *dev,
>         return sprintf(buf, "%u\n", temp);
>  }
>
> +static ssize_t temp2_input_show(struct device *dev,
> +                               struct device_attribute *devattr, char *buf)
> +{
> +       struct k10temp_data *data = dev_get_drvdata(dev);
> +       unsigned int temp = get_raw_temp(data);
> +
> +       return sprintf(buf, "%u\n", temp);
> +}
> +
> +static ssize_t temp_label_show(struct device *dev,
> +                              struct device_attribute *devattr, char *buf)
> +{
> +       struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> +
> +       return sprintf(buf, "%s\n", attr->index ? "Tctl" : "Tdie");
> +}
> +
>  static ssize_t temp1_max_show(struct device *dev,
>                               struct device_attribute *attr, char *buf)
>  {
> @@ -187,16 +212,23 @@ static DEVICE_ATTR_RO(temp1_max);
>  static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, show_temp_crit, NULL, 0);
>  static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, show_temp_crit, NULL, 1);
>
> +static SENSOR_DEVICE_ATTR(temp1_label, 0444, temp_label_show, NULL, 0);
> +static DEVICE_ATTR_RO(temp2_input);
> +static SENSOR_DEVICE_ATTR(temp2_label, 0444, temp_label_show, NULL, 1);
> +
>  static umode_t k10temp_is_visible(struct kobject *kobj,
>                                   struct attribute *attr, int index)
>  {
>         struct device *dev = container_of(kobj, struct device, kobj);
>         struct k10temp_data *data = dev_get_drvdata(dev);
>         struct pci_dev *pdev = data->pdev;
> +       u32 reg;
>
> -       if (index >= 2) {
> -               u32 reg;
> -
> +       switch (index) {
> +       case 0 ... 1:   /* temp1_input, temp1_max */
> +       default:
> +               break;
> +       case 2 ... 3:   /* temp1_crit, temp1_crit_hyst */
>                 if (!data->read_htcreg)
>                         return 0;
>
> @@ -208,6 +240,11 @@ static umode_t k10temp_is_visible(struct kobject *kobj,
>                 data->read_htcreg(data->pdev, &reg);
>                 if (!(reg & HTC_ENABLE))
>                         return 0;
> +               break;
> +       case 4 ... 6:   /* temp1_label, temp2_input, temp2_label */
> +               if (!data->show_tdie)
> +                       return 0;
> +               break;
>         }
>         return attr->mode;
>  }
> @@ -217,6 +254,9 @@ static struct attribute *k10temp_attrs[] = {
>         &dev_attr_temp1_max.attr,
>         &sensor_dev_attr_temp1_crit.dev_attr.attr,
>         &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
> +       &sensor_dev_attr_temp1_label.dev_attr.attr,
> +       &dev_attr_temp2_input.attr,
> +       &sensor_dev_attr_temp2_label.dev_attr.attr,
>         NULL
>  };
>
> @@ -292,6 +332,7 @@ static int k10temp_probe(struct pci_dev *pdev,
>         } else if (boot_cpu_data.x86 == 0x17) {
>                 data->temp_adjust_mask = 0x80000;
>                 data->read_tempreg = read_tempreg_nb_f17;
> +               data->show_tdie = true;
>         } else {
>                 data->read_htcreg = read_htcreg_pci;
>                 data->read_tempreg = read_tempreg_pci;
> --
> 2.7.4
>

On my EPYC box Tdie and Tctl seems to have the same value.
However the code is working fine and both values are displayed now.

Tested-by: Gabriel Craciunescu <nix.or.die@gmail.com>

Regards

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] hwmon: (k10temp) Display both Tctl and Tdie
  2018-04-29 16:13   ` Gabriel C
@ 2018-04-29 16:27     ` Guenter Roeck
  0 siblings, 0 replies; 5+ messages in thread
From: Guenter Roeck @ 2018-04-29 16:27 UTC (permalink / raw)
  To: Gabriel C; +Cc: Clemens Ladisch, Jean Delvare, linux-hwmon, LKML, Brian Woods

On 04/29/2018 09:13 AM, Gabriel C wrote:
> 2018-04-27 5:13 GMT+02:00 Guenter Roeck <linux@roeck-us.net>:
>> On some AMD CPUs, there is a different between the die temperature
>> (Tdie) and the reported temperature (Tctl). Tdie is the real measured
>> temperature, and Tctl is used for fan control. Lets report both for
>> affected CPUs.
>>
>> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
>> ---
>>   drivers/hwmon/k10temp.c | 55 ++++++++++++++++++++++++++++++++++++++++++-------
>>   1 file changed, 48 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
>> index b06bb1f90853..1135b8f1ad0f 100644
>> --- a/drivers/hwmon/k10temp.c
>> +++ b/drivers/hwmon/k10temp.c
>> @@ -80,6 +80,7 @@ struct k10temp_data {
>>          void (*read_tempreg)(struct pci_dev *pdev, u32 *regval);
>>          int temp_offset;
>>          u32 temp_adjust_mask;
>> +       bool show_tdie;
>>   };
>>
>>   struct tctl_offset {
>> @@ -140,17 +141,24 @@ static void read_tempreg_nb_f17(struct pci_dev *pdev, u32 *regval)
>>                            F17H_M01H_REPORTED_TEMP_CTRL_OFFSET, regval);
>>   }
>>
>> -static ssize_t temp1_input_show(struct device *dev,
>> -                               struct device_attribute *attr, char *buf)
>> +unsigned int get_raw_temp(struct k10temp_data *data)
>>   {
>> -       struct k10temp_data *data = dev_get_drvdata(dev);
>> -       u32 regval;
>>          unsigned int temp;
>> +       u32 regval;
>>
>>          data->read_tempreg(data->pdev, &regval);
>>          temp = (regval >> 21) * 125;
>>          if (regval & data->temp_adjust_mask)
>>                  temp -= 49000;
>> +       return temp;
>> +}
>> +
>> +static ssize_t temp1_input_show(struct device *dev,
>> +                               struct device_attribute *attr, char *buf)
>> +{
>> +       struct k10temp_data *data = dev_get_drvdata(dev);
>> +       unsigned int temp = get_raw_temp(data);
>> +
>>          if (temp > data->temp_offset)
>>                  temp -= data->temp_offset;
>>          else
>> @@ -159,6 +167,23 @@ static ssize_t temp1_input_show(struct device *dev,
>>          return sprintf(buf, "%u\n", temp);
>>   }
>>
>> +static ssize_t temp2_input_show(struct device *dev,
>> +                               struct device_attribute *devattr, char *buf)
>> +{
>> +       struct k10temp_data *data = dev_get_drvdata(dev);
>> +       unsigned int temp = get_raw_temp(data);
>> +
>> +       return sprintf(buf, "%u\n", temp);
>> +}
>> +
>> +static ssize_t temp_label_show(struct device *dev,
>> +                              struct device_attribute *devattr, char *buf)
>> +{
>> +       struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
>> +
>> +       return sprintf(buf, "%s\n", attr->index ? "Tctl" : "Tdie");
>> +}
>> +
>>   static ssize_t temp1_max_show(struct device *dev,
>>                                struct device_attribute *attr, char *buf)
>>   {
>> @@ -187,16 +212,23 @@ static DEVICE_ATTR_RO(temp1_max);
>>   static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, show_temp_crit, NULL, 0);
>>   static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, show_temp_crit, NULL, 1);
>>
>> +static SENSOR_DEVICE_ATTR(temp1_label, 0444, temp_label_show, NULL, 0);
>> +static DEVICE_ATTR_RO(temp2_input);
>> +static SENSOR_DEVICE_ATTR(temp2_label, 0444, temp_label_show, NULL, 1);
>> +
>>   static umode_t k10temp_is_visible(struct kobject *kobj,
>>                                    struct attribute *attr, int index)
>>   {
>>          struct device *dev = container_of(kobj, struct device, kobj);
>>          struct k10temp_data *data = dev_get_drvdata(dev);
>>          struct pci_dev *pdev = data->pdev;
>> +       u32 reg;
>>
>> -       if (index >= 2) {
>> -               u32 reg;
>> -
>> +       switch (index) {
>> +       case 0 ... 1:   /* temp1_input, temp1_max */
>> +       default:
>> +               break;
>> +       case 2 ... 3:   /* temp1_crit, temp1_crit_hyst */
>>                  if (!data->read_htcreg)
>>                          return 0;
>>
>> @@ -208,6 +240,11 @@ static umode_t k10temp_is_visible(struct kobject *kobj,
>>                  data->read_htcreg(data->pdev, &reg);
>>                  if (!(reg & HTC_ENABLE))
>>                          return 0;
>> +               break;
>> +       case 4 ... 6:   /* temp1_label, temp2_input, temp2_label */
>> +               if (!data->show_tdie)
>> +                       return 0;
>> +               break;
>>          }
>>          return attr->mode;
>>   }
>> @@ -217,6 +254,9 @@ static struct attribute *k10temp_attrs[] = {
>>          &dev_attr_temp1_max.attr,
>>          &sensor_dev_attr_temp1_crit.dev_attr.attr,
>>          &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
>> +       &sensor_dev_attr_temp1_label.dev_attr.attr,
>> +       &dev_attr_temp2_input.attr,
>> +       &sensor_dev_attr_temp2_label.dev_attr.attr,
>>          NULL
>>   };
>>
>> @@ -292,6 +332,7 @@ static int k10temp_probe(struct pci_dev *pdev,
>>          } else if (boot_cpu_data.x86 == 0x17) {
>>                  data->temp_adjust_mask = 0x80000;
>>                  data->read_tempreg = read_tempreg_nb_f17;
>> +               data->show_tdie = true;
>>          } else {
>>                  data->read_htcreg = read_htcreg_pci;
>>                  data->read_tempreg = read_tempreg_pci;
>> --
>> 2.7.4
>>
> 
> On my EPYC box Tdie and Tctl seems to have the same value.
> However the code is working fine and both values are displayed now.
> 

Yes, that is as expected; EPYC CPUs don't have a temperature offset
between Tctl and Tdie (at least not as far as I know). Tctl is the value
reported by the chip; if I recall correctly, it is in the low 20s on
your system if it is idle. If there was an offset, Tdie would be reported
as an even lower temperature, ie in the 10s, which would not make sense.

> Tested-by: Gabriel Craciunescu <nix.or.die@gmail.com>
> 
Thanks a lot for testing!

Guenter

> Regards
> --
> To unsubscribe from this list: send the line "unsubscribe linux-hwmon" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-04-29 16:27 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-27  3:13 [PATCH 1/2] hwmon: (k10temp) Fix reading critical temperature register Guenter Roeck
2018-04-27  3:13 ` [PATCH 2/2] hwmon: (k10temp) Display both Tctl and Tdie Guenter Roeck
2018-04-29 16:13   ` Gabriel C
2018-04-29 16:27     ` Guenter Roeck
2018-04-29 14:59 ` [1/2] hwmon: (k10temp) Fix reading critical temperature register Guenter Roeck

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).