LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH 0/4] A few autonuma cleanups
@ 2021-10-04 10:57 Bharata B Rao
  2021-10-04 10:57 ` [PATCH 1/4] sched/numa: Replace hard-coded number by a define in numa_task_group() Bharata B Rao
                   ` (3 more replies)
  0 siblings, 4 replies; 17+ messages in thread
From: Bharata B Rao @ 2021-10-04 10:57 UTC (permalink / raw)
  To: linux-kernel; +Cc: mingo, peterz, mgorman, riel, Bharata B Rao

Hi,

Here are a few cleanups to the autonuma code.

First three patches shouldn't result in any functionality change.
Patches 1 and 2 are fairly simple and patch 3 is a trivial comment
fix. I am not too sure about patch 4 and hence it is RFC.

Bharata B Rao (4):
  sched/numa: Replace hard-coded number by a define in numa_task_group()
  sched/numa: Remove the redundant member numa_group::fault_cpus
  sched/numa: Fix a few comments
  sched/numa: Don't update mm->numa_next_scan from fault path

 kernel/sched/fair.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 1/4] sched/numa: Replace hard-coded number by a define in numa_task_group()
  2021-10-04 10:57 [PATCH 0/4] A few autonuma cleanups Bharata B Rao
@ 2021-10-04 10:57 ` Bharata B Rao
  2021-10-05  8:18   ` Mel Gorman
                     ` (2 more replies)
  2021-10-04 10:57 ` [PATCH 2/4] sched/numa: Remove the redundant member numa_group::fault_cpus Bharata B Rao
                   ` (2 subsequent siblings)
  3 siblings, 3 replies; 17+ messages in thread
From: Bharata B Rao @ 2021-10-04 10:57 UTC (permalink / raw)
  To: linux-kernel; +Cc: mingo, peterz, mgorman, riel, Bharata B Rao

While allocating group fault stats, task_numa_group()
is using a hard coded number 4. Replace this by
NR_NUMA_HINT_FAULT_STATS.

No functionality change in this commit.

Signed-off-by: Bharata B Rao <bharata@amd.com>
---
 kernel/sched/fair.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ff69f245b939..e8a696fd713d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2481,7 +2481,8 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
 
 	if (unlikely(!deref_curr_numa_group(p))) {
 		unsigned int size = sizeof(struct numa_group) +
-				    4*nr_node_ids*sizeof(unsigned long);
+				    NR_NUMA_HINT_FAULT_STATS *
+				    nr_node_ids * sizeof(unsigned long);
 
 		grp = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
 		if (!grp)
-- 
2.25.1


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 2/4] sched/numa: Remove the redundant member numa_group::fault_cpus
  2021-10-04 10:57 [PATCH 0/4] A few autonuma cleanups Bharata B Rao
  2021-10-04 10:57 ` [PATCH 1/4] sched/numa: Replace hard-coded number by a define in numa_task_group() Bharata B Rao
@ 2021-10-04 10:57 ` Bharata B Rao
  2021-10-05  8:21   ` Mel Gorman
                     ` (2 more replies)
  2021-10-04 10:57 ` [PATCH 3/4] sched/numa: Fix a few comments Bharata B Rao
  2021-10-04 10:57 ` [RFC PATCH 4/4] sched/numa: Don't update mm->numa_next_scan from fault path Bharata B Rao
  3 siblings, 3 replies; 17+ messages in thread
From: Bharata B Rao @ 2021-10-04 10:57 UTC (permalink / raw)
  To: linux-kernel; +Cc: mingo, peterz, mgorman, riel, Bharata B Rao

numa_group::fault_cpus is actually a pointer to the region
in numa_group::faults[] where NUMA_CPU stats are located.

Remove this redundant member and use numa_group::faults[NUMA_CPU]
directly like it is done for similar per-process numa fault stats.

There is no functionality change due to this commit.

Signed-off-by: Bharata B Rao <bharata@amd.com>
---
 kernel/sched/fair.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e8a696fd713d..3471c8ef0659 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1081,11 +1081,12 @@ struct numa_group {
 	unsigned long total_faults;
 	unsigned long max_faults_cpu;
 	/*
+	 * faults[] array is split into two regions: faults_mem and faults_cpu.
+	 *
 	 * Faults_cpu is used to decide whether memory should move
 	 * towards the CPU. As a consequence, these stats are weighted
 	 * more by CPU use than by memory faults.
 	 */
-	unsigned long *faults_cpu;
 	unsigned long faults[];
 };
 
@@ -1259,8 +1260,8 @@ static inline unsigned long group_faults(struct task_struct *p, int nid)
 
 static inline unsigned long group_faults_cpu(struct numa_group *group, int nid)
 {
-	return group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 0)] +
-		group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 1)];
+	return group->faults[task_faults_idx(NUMA_CPU, nid, 0)] +
+		group->faults[task_faults_idx(NUMA_CPU, nid, 1)];
 }
 
 static inline unsigned long group_faults_priv(struct numa_group *ng)
@@ -2427,7 +2428,7 @@ static void task_numa_placement(struct task_struct *p)
 				 * is at the beginning of the numa_faults array.
 				 */
 				ng->faults[mem_idx] += diff;
-				ng->faults_cpu[mem_idx] += f_diff;
+				ng->faults[cpu_idx] += f_diff;
 				ng->total_faults += diff;
 				group_faults += ng->faults[mem_idx];
 			}
@@ -2493,9 +2494,6 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
 		grp->max_faults_cpu = 0;
 		spin_lock_init(&grp->lock);
 		grp->gid = p->pid;
-		/* Second half of the array tracks nids where faults happen */
-		grp->faults_cpu = grp->faults + NR_NUMA_HINT_FAULT_TYPES *
-						nr_node_ids;
 
 		for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
 			grp->faults[i] = p->numa_faults[i];
-- 
2.25.1


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 3/4] sched/numa: Fix a few comments
  2021-10-04 10:57 [PATCH 0/4] A few autonuma cleanups Bharata B Rao
  2021-10-04 10:57 ` [PATCH 1/4] sched/numa: Replace hard-coded number by a define in numa_task_group() Bharata B Rao
  2021-10-04 10:57 ` [PATCH 2/4] sched/numa: Remove the redundant member numa_group::fault_cpus Bharata B Rao
@ 2021-10-04 10:57 ` Bharata B Rao
  2021-10-05  8:22   ` Mel Gorman
                     ` (2 more replies)
  2021-10-04 10:57 ` [RFC PATCH 4/4] sched/numa: Don't update mm->numa_next_scan from fault path Bharata B Rao
  3 siblings, 3 replies; 17+ messages in thread
From: Bharata B Rao @ 2021-10-04 10:57 UTC (permalink / raw)
  To: linux-kernel; +Cc: mingo, peterz, mgorman, riel, Bharata B Rao

Fix a few comments to help understand them better.

Signed-off-by: Bharata B Rao <bharata@amd.com>
---
 kernel/sched/fair.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3471c8ef0659..ceadb43e927b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2117,7 +2117,7 @@ static void numa_migrate_preferred(struct task_struct *p)
 }
 
 /*
- * Find out how many nodes on the workload is actively running on. Do this by
+ * Find out how many nodes the workload is actively running on. Do this by
  * tracking the nodes from which NUMA hinting faults are triggered. This can
  * be different from the set of nodes where the workload's memory is currently
  * located.
@@ -2171,7 +2171,7 @@ static void update_task_scan_period(struct task_struct *p,
 
 	/*
 	 * If there were no record hinting faults then either the task is
-	 * completely idle or all activity is areas that are not of interest
+	 * completely idle or all activity is in areas that are not of interest
 	 * to automatic numa balancing. Related to that, if there were failed
 	 * migration then it implies we are migrating too quickly or the local
 	 * node is overloaded. In either case, scan slower
-- 
2.25.1


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RFC PATCH 4/4] sched/numa: Don't update mm->numa_next_scan from fault path
  2021-10-04 10:57 [PATCH 0/4] A few autonuma cleanups Bharata B Rao
                   ` (2 preceding siblings ...)
  2021-10-04 10:57 ` [PATCH 3/4] sched/numa: Fix a few comments Bharata B Rao
@ 2021-10-04 10:57 ` Bharata B Rao
  2021-10-05  8:23   ` Mel Gorman
  3 siblings, 1 reply; 17+ messages in thread
From: Bharata B Rao @ 2021-10-04 10:57 UTC (permalink / raw)
  To: linux-kernel; +Cc: mingo, peterz, mgorman, riel, Bharata B Rao

p->numa_scan_period is typically scaled up or down from
the fault path and mm->numa_next_scan is updated during
scanning from the task_work context using cmpxchg.

However there is one case where the scan period is increased
in the fault path, but mm->numa_next_scan

 - is immediately updated and
 - updated without using cmpxchg

Both of the above don't seem intended and hence remove
the updation of mm->numa_next_scan from the fault path
Updation should happen from task_work context subsequently.

Signed-off-by: Bharata B Rao <bharata@amd.com>
---
 kernel/sched/fair.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ceadb43e927b..3e420e73e265 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2180,9 +2180,6 @@ static void update_task_scan_period(struct task_struct *p,
 		p->numa_scan_period = min(p->numa_scan_period_max,
 			p->numa_scan_period << 1);
 
-		p->mm->numa_next_scan = jiffies +
-			msecs_to_jiffies(p->numa_scan_period);
-
 		return;
 	}
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/4] sched/numa: Replace hard-coded number by a define in numa_task_group()
  2021-10-04 10:57 ` [PATCH 1/4] sched/numa: Replace hard-coded number by a define in numa_task_group() Bharata B Rao
@ 2021-10-05  8:18   ` Mel Gorman
  2021-10-09 10:07   ` [tip: sched/core] " tip-bot2 for Bharata B Rao
  2021-10-14 11:16   ` tip-bot2 for Bharata B Rao
  2 siblings, 0 replies; 17+ messages in thread
From: Mel Gorman @ 2021-10-05  8:18 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: linux-kernel, mingo, peterz, riel

On Mon, Oct 04, 2021 at 04:27:03PM +0530, Bharata B Rao wrote:
> While allocating group fault stats, task_numa_group()
> is using a hard coded number 4. Replace this by
> NR_NUMA_HINT_FAULT_STATS.
> 
> No functionality change in this commit.
> 
> Signed-off-by: Bharata B Rao <bharata@amd.com>

Acked-by: Mel Gorman <mgorman@suse.de>

-- 
Mel Gorman
SUSE Labs

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/4] sched/numa: Remove the redundant member numa_group::fault_cpus
  2021-10-04 10:57 ` [PATCH 2/4] sched/numa: Remove the redundant member numa_group::fault_cpus Bharata B Rao
@ 2021-10-05  8:21   ` Mel Gorman
  2021-10-09 10:07   ` [tip: sched/core] " tip-bot2 for Bharata B Rao
  2021-10-14 11:16   ` tip-bot2 for Bharata B Rao
  2 siblings, 0 replies; 17+ messages in thread
From: Mel Gorman @ 2021-10-05  8:21 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: linux-kernel, mingo, peterz, riel

On Mon, Oct 04, 2021 at 04:27:04PM +0530, Bharata B Rao wrote:
> numa_group::fault_cpus is actually a pointer to the region
> in numa_group::faults[] where NUMA_CPU stats are located.
> 
> Remove this redundant member and use numa_group::faults[NUMA_CPU]
> directly like it is done for similar per-process numa fault stats.
> 
> There is no functionality change due to this commit.
> 
> Signed-off-by: Bharata B Rao <bharata@amd.com>

Acked-by: Mel Gorman <mgorman@suse.de>

-- 
Mel Gorman
SUSE Labs

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/4] sched/numa: Fix a few comments
  2021-10-04 10:57 ` [PATCH 3/4] sched/numa: Fix a few comments Bharata B Rao
@ 2021-10-05  8:22   ` Mel Gorman
  2021-10-09 10:07   ` [tip: sched/core] " tip-bot2 for Bharata B Rao
  2021-10-14 11:16   ` tip-bot2 for Bharata B Rao
  2 siblings, 0 replies; 17+ messages in thread
From: Mel Gorman @ 2021-10-05  8:22 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: linux-kernel, mingo, peterz, riel

On Mon, Oct 04, 2021 at 04:27:05PM +0530, Bharata B Rao wrote:
> Fix a few comments to help understand them better.
> 
> Signed-off-by: Bharata B Rao <bharata@amd.com>

Acked-by: Mel Gorman <mgorman@suse.de>

-- 
Mel Gorman
SUSE Labs

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC PATCH 4/4] sched/numa: Don't update mm->numa_next_scan from fault path
  2021-10-04 10:57 ` [RFC PATCH 4/4] sched/numa: Don't update mm->numa_next_scan from fault path Bharata B Rao
@ 2021-10-05  8:23   ` Mel Gorman
  2021-10-05  9:10     ` Bharata B Rao
  0 siblings, 1 reply; 17+ messages in thread
From: Mel Gorman @ 2021-10-05  8:23 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: linux-kernel, mingo, peterz, riel

On Mon, Oct 04, 2021 at 04:27:06PM +0530, Bharata B Rao wrote:
> p->numa_scan_period is typically scaled up or down from
> the fault path and mm->numa_next_scan is updated during
> scanning from the task_work context using cmpxchg.
> 
> However there is one case where the scan period is increased
> in the fault path, but mm->numa_next_scan
> 
>  - is immediately updated and
>  - updated without using cmpxchg
> 
> Both of the above don't seem intended and hence remove
> the updation of mm->numa_next_scan from the fault path
> Updation should happen from task_work context subsequently.
> 
> Signed-off-by: Bharata B Rao <bharata@amd.com>

I believe the update was intended because it aims to reduce scanning
when the task is either completely idle or activity is in memory ranges
that are not influenced by numab. What is the user-visible impact you
observe?

My expectation is that in some cases this will increase the number of
PTE updates and migrations. It may even be a performance gain for some
workloads if it increases locality but in cases where locality is poor
(e.g. heavily shared regions or cross-node migrations), there will be a
loss due to increased numab activity.

Updating via cmpxchg would be ok to avoid potential collisions between
threads updating a shared mm.

-- 
Mel Gorman
SUSE Labs

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC PATCH 4/4] sched/numa: Don't update mm->numa_next_scan from fault path
  2021-10-05  8:23   ` Mel Gorman
@ 2021-10-05  9:10     ` Bharata B Rao
  2021-10-07 10:25       ` Mel Gorman
  0 siblings, 1 reply; 17+ messages in thread
From: Bharata B Rao @ 2021-10-05  9:10 UTC (permalink / raw)
  To: Mel Gorman; +Cc: linux-kernel, mingo, peterz


On 10/5/2021 1:53 PM, Mel Gorman wrote:
> On Mon, Oct 04, 2021 at 04:27:06PM +0530, Bharata B Rao wrote:
>> p->numa_scan_period is typically scaled up or down from
>> the fault path and mm->numa_next_scan is updated during
>> scanning from the task_work context using cmpxchg.
>>
>> However there is one case where the scan period is increased
>> in the fault path, but mm->numa_next_scan
>>
>>  - is immediately updated and
>>  - updated without using cmpxchg
>>
>> Both of the above don't seem intended and hence remove
>> the updation of mm->numa_next_scan from the fault path
>> Updation should happen from task_work context subsequently.
>>
>> Signed-off-by: Bharata B Rao <bharata@amd.com>
> 
> I believe the update was intended because it aims to reduce scanning
> when the task is either completely idle or activity is in memory ranges
> that are not influenced by numab. What is the user-visible impact you
> observe?

I haven't measured, it just appeared unintended when glancing at
the code, but now you have clarified it.

> 
> My expectation is that in some cases this will increase the number of
> PTE updates and migrations. It may even be a performance gain for some
> workloads if it increases locality but in cases where locality is poor
> (e.g. heavily shared regions or cross-node migrations), there will be a
> loss due to increased numab activity.

Thanks, I will check if I can measure and verify the above.

> 
> Updating via cmpxchg would be ok to avoid potential collisions between
> threads updating a shared mm.

Ok, may be I could just resend with changing the scan period update
to use cmpxchg.

I also notice that in this case of scan period update, we just return
without resetting the p->numa_faults_locality[]. Do you think if
skipping the reset doesn't matter in this case?

Regards,
Bharata.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC PATCH 4/4] sched/numa: Don't update mm->numa_next_scan from fault path
  2021-10-05  9:10     ` Bharata B Rao
@ 2021-10-07 10:25       ` Mel Gorman
  0 siblings, 0 replies; 17+ messages in thread
From: Mel Gorman @ 2021-10-07 10:25 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: linux-kernel, mingo, peterz

On Tue, Oct 05, 2021 at 02:40:15PM +0530, Bharata B Rao wrote:
> > 
> > Updating via cmpxchg would be ok to avoid potential collisions between
> > threads updating a shared mm.
> 
> Ok, may be I could just resend with changing the scan period update
> to use cmpxchg.
> 
> I also notice that in this case of scan period update, we just return
> without resetting the p->numa_faults_locality[]. Do you think if
> skipping the reset doesn't matter in this case?
> 

If there is no fault activity or migrations are failing, there is no
advantage to clearing numa_faults_locality[]. The information there
is still useful even if the scan period is updated.

-- 
Mel Gorman
SUSE Labs

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [tip: sched/core] sched/numa: Fix a few comments
  2021-10-04 10:57 ` [PATCH 3/4] sched/numa: Fix a few comments Bharata B Rao
  2021-10-05  8:22   ` Mel Gorman
@ 2021-10-09 10:07   ` tip-bot2 for Bharata B Rao
  2021-10-14 11:16   ` tip-bot2 for Bharata B Rao
  2 siblings, 0 replies; 17+ messages in thread
From: tip-bot2 for Bharata B Rao @ 2021-10-09 10:07 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Bharata B Rao, Peter Zijlstra (Intel), Mel Gorman, x86, linux-kernel

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     2294d6f5131b6b226d28828bd60e6fbc69962e84
Gitweb:        https://git.kernel.org/tip/2294d6f5131b6b226d28828bd60e6fbc69962e84
Author:        Bharata B Rao <bharata@amd.com>
AuthorDate:    Mon, 04 Oct 2021 16:27:05 +05:30
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 07 Oct 2021 13:51:16 +02:00

sched/numa: Fix a few comments

Fix a few comments to help understand them better.

Signed-off-by: Bharata B Rao <bharata@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Link: https://lkml.kernel.org/r/20211004105706.3669-4-bharata@amd.com
---
 kernel/sched/fair.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index cfbd5ef..87db481 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2074,7 +2074,7 @@ static void numa_migrate_preferred(struct task_struct *p)
 }
 
 /*
- * Find out how many nodes on the workload is actively running on. Do this by
+ * Find out how many nodes the workload is actively running on. Do this by
  * tracking the nodes from which NUMA hinting faults are triggered. This can
  * be different from the set of nodes where the workload's memory is currently
  * located.
@@ -2128,7 +2128,7 @@ static void update_task_scan_period(struct task_struct *p,
 
 	/*
 	 * If there were no record hinting faults then either the task is
-	 * completely idle or all activity is areas that are not of interest
+	 * completely idle or all activity is in areas that are not of interest
 	 * to automatic numa balancing. Related to that, if there were failed
 	 * migration then it implies we are migrating too quickly or the local
 	 * node is overloaded. In either case, scan slower

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [tip: sched/core] sched/numa: Remove the redundant member numa_group::fault_cpus
  2021-10-04 10:57 ` [PATCH 2/4] sched/numa: Remove the redundant member numa_group::fault_cpus Bharata B Rao
  2021-10-05  8:21   ` Mel Gorman
@ 2021-10-09 10:07   ` tip-bot2 for Bharata B Rao
  2021-10-14 11:16   ` tip-bot2 for Bharata B Rao
  2 siblings, 0 replies; 17+ messages in thread
From: tip-bot2 for Bharata B Rao @ 2021-10-09 10:07 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Bharata B Rao, Peter Zijlstra (Intel), Mel Gorman, x86, linux-kernel

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     00c034108a76e8282809b7f25fa6ff147a9c6893
Gitweb:        https://git.kernel.org/tip/00c034108a76e8282809b7f25fa6ff147a9c6893
Author:        Bharata B Rao <bharata@amd.com>
AuthorDate:    Mon, 04 Oct 2021 16:27:04 +05:30
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 07 Oct 2021 13:51:16 +02:00

sched/numa: Remove the redundant member numa_group::fault_cpus

numa_group::fault_cpus is actually a pointer to the region
in numa_group::faults[] where NUMA_CPU stats are located.

Remove this redundant member and use numa_group::faults[NUMA_CPU]
directly like it is done for similar per-process numa fault stats.

There is no functionality change due to this commit.

Signed-off-by: Bharata B Rao <bharata@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Link: https://lkml.kernel.org/r/20211004105706.3669-3-bharata@amd.com
---
 kernel/sched/fair.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index fc0a0ed..cfbd5ef 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1038,11 +1038,12 @@ struct numa_group {
 	unsigned long total_faults;
 	unsigned long max_faults_cpu;
 	/*
+	 * faults[] array is split into two regions: faults_mem and faults_cpu.
+	 *
 	 * Faults_cpu is used to decide whether memory should move
 	 * towards the CPU. As a consequence, these stats are weighted
 	 * more by CPU use than by memory faults.
 	 */
-	unsigned long *faults_cpu;
 	unsigned long faults[];
 };
 
@@ -1216,8 +1217,8 @@ static inline unsigned long group_faults(struct task_struct *p, int nid)
 
 static inline unsigned long group_faults_cpu(struct numa_group *group, int nid)
 {
-	return group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 0)] +
-		group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 1)];
+	return group->faults[task_faults_idx(NUMA_CPU, nid, 0)] +
+		group->faults[task_faults_idx(NUMA_CPU, nid, 1)];
 }
 
 static inline unsigned long group_faults_priv(struct numa_group *ng)
@@ -2384,7 +2385,7 @@ static void task_numa_placement(struct task_struct *p)
 				 * is at the beginning of the numa_faults array.
 				 */
 				ng->faults[mem_idx] += diff;
-				ng->faults_cpu[mem_idx] += f_diff;
+				ng->faults[cpu_idx] += f_diff;
 				ng->total_faults += diff;
 				group_faults += ng->faults[mem_idx];
 			}
@@ -2450,9 +2451,6 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
 		grp->max_faults_cpu = 0;
 		spin_lock_init(&grp->lock);
 		grp->gid = p->pid;
-		/* Second half of the array tracks nids where faults happen */
-		grp->faults_cpu = grp->faults + NR_NUMA_HINT_FAULT_TYPES *
-						nr_node_ids;
 
 		for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
 			grp->faults[i] = p->numa_faults[i];

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [tip: sched/core] sched/numa: Replace hard-coded number by a define in numa_task_group()
  2021-10-04 10:57 ` [PATCH 1/4] sched/numa: Replace hard-coded number by a define in numa_task_group() Bharata B Rao
  2021-10-05  8:18   ` Mel Gorman
@ 2021-10-09 10:07   ` tip-bot2 for Bharata B Rao
  2021-10-14 11:16   ` tip-bot2 for Bharata B Rao
  2 siblings, 0 replies; 17+ messages in thread
From: tip-bot2 for Bharata B Rao @ 2021-10-09 10:07 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Bharata B Rao, Peter Zijlstra (Intel), Mel Gorman, x86, linux-kernel

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     b6153093de41186e2c534ffffb8ce81b1666b110
Gitweb:        https://git.kernel.org/tip/b6153093de41186e2c534ffffb8ce81b1666b110
Author:        Bharata B Rao <bharata@amd.com>
AuthorDate:    Mon, 04 Oct 2021 16:27:03 +05:30
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 07 Oct 2021 13:51:16 +02:00

sched/numa: Replace hard-coded number by a define in numa_task_group()

While allocating group fault stats, task_numa_group()
is using a hard coded number 4. Replace this by
NR_NUMA_HINT_FAULT_STATS.

No functionality change in this commit.

Signed-off-by: Bharata B Rao <bharata@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Link: https://lkml.kernel.org/r/20211004105706.3669-2-bharata@amd.com
---
 kernel/sched/fair.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2468d1d..fc0a0ed 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2438,7 +2438,8 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
 
 	if (unlikely(!deref_curr_numa_group(p))) {
 		unsigned int size = sizeof(struct numa_group) +
-				    4*nr_node_ids*sizeof(unsigned long);
+				    NR_NUMA_HINT_FAULT_STATS *
+				    nr_node_ids * sizeof(unsigned long);
 
 		grp = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
 		if (!grp)

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [tip: sched/core] sched/numa: Fix a few comments
  2021-10-04 10:57 ` [PATCH 3/4] sched/numa: Fix a few comments Bharata B Rao
  2021-10-05  8:22   ` Mel Gorman
  2021-10-09 10:07   ` [tip: sched/core] " tip-bot2 for Bharata B Rao
@ 2021-10-14 11:16   ` tip-bot2 for Bharata B Rao
  2 siblings, 0 replies; 17+ messages in thread
From: tip-bot2 for Bharata B Rao @ 2021-10-14 11:16 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Bharata B Rao, Peter Zijlstra (Intel), Mel Gorman, x86, linux-kernel

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     7d380f24fe662033fd21a65f678057abd293f76e
Gitweb:        https://git.kernel.org/tip/7d380f24fe662033fd21a65f678057abd293f76e
Author:        Bharata B Rao <bharata@amd.com>
AuthorDate:    Mon, 04 Oct 2021 16:27:05 +05:30
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 14 Oct 2021 13:09:58 +02:00

sched/numa: Fix a few comments

Fix a few comments to help understand them better.

Signed-off-by: Bharata B Rao <bharata@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Link: https://lkml.kernel.org/r/20211004105706.3669-4-bharata@amd.com
---
 kernel/sched/fair.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index cfbd5ef..87db481 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2074,7 +2074,7 @@ static void numa_migrate_preferred(struct task_struct *p)
 }
 
 /*
- * Find out how many nodes on the workload is actively running on. Do this by
+ * Find out how many nodes the workload is actively running on. Do this by
  * tracking the nodes from which NUMA hinting faults are triggered. This can
  * be different from the set of nodes where the workload's memory is currently
  * located.
@@ -2128,7 +2128,7 @@ static void update_task_scan_period(struct task_struct *p,
 
 	/*
 	 * If there were no record hinting faults then either the task is
-	 * completely idle or all activity is areas that are not of interest
+	 * completely idle or all activity is in areas that are not of interest
 	 * to automatic numa balancing. Related to that, if there were failed
 	 * migration then it implies we are migrating too quickly or the local
 	 * node is overloaded. In either case, scan slower

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [tip: sched/core] sched/numa: Remove the redundant member numa_group::fault_cpus
  2021-10-04 10:57 ` [PATCH 2/4] sched/numa: Remove the redundant member numa_group::fault_cpus Bharata B Rao
  2021-10-05  8:21   ` Mel Gorman
  2021-10-09 10:07   ` [tip: sched/core] " tip-bot2 for Bharata B Rao
@ 2021-10-14 11:16   ` tip-bot2 for Bharata B Rao
  2 siblings, 0 replies; 17+ messages in thread
From: tip-bot2 for Bharata B Rao @ 2021-10-14 11:16 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Bharata B Rao, Peter Zijlstra (Intel), Mel Gorman, x86, linux-kernel

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     5b763a14a5164e4c442e99d186fb39dac489e49b
Gitweb:        https://git.kernel.org/tip/5b763a14a5164e4c442e99d186fb39dac489e49b
Author:        Bharata B Rao <bharata@amd.com>
AuthorDate:    Mon, 04 Oct 2021 16:27:04 +05:30
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 14 Oct 2021 13:09:58 +02:00

sched/numa: Remove the redundant member numa_group::fault_cpus

numa_group::fault_cpus is actually a pointer to the region
in numa_group::faults[] where NUMA_CPU stats are located.

Remove this redundant member and use numa_group::faults[NUMA_CPU]
directly like it is done for similar per-process numa fault stats.

There is no functionality change due to this commit.

Signed-off-by: Bharata B Rao <bharata@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Link: https://lkml.kernel.org/r/20211004105706.3669-3-bharata@amd.com
---
 kernel/sched/fair.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index fc0a0ed..cfbd5ef 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1038,11 +1038,12 @@ struct numa_group {
 	unsigned long total_faults;
 	unsigned long max_faults_cpu;
 	/*
+	 * faults[] array is split into two regions: faults_mem and faults_cpu.
+	 *
 	 * Faults_cpu is used to decide whether memory should move
 	 * towards the CPU. As a consequence, these stats are weighted
 	 * more by CPU use than by memory faults.
 	 */
-	unsigned long *faults_cpu;
 	unsigned long faults[];
 };
 
@@ -1216,8 +1217,8 @@ static inline unsigned long group_faults(struct task_struct *p, int nid)
 
 static inline unsigned long group_faults_cpu(struct numa_group *group, int nid)
 {
-	return group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 0)] +
-		group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 1)];
+	return group->faults[task_faults_idx(NUMA_CPU, nid, 0)] +
+		group->faults[task_faults_idx(NUMA_CPU, nid, 1)];
 }
 
 static inline unsigned long group_faults_priv(struct numa_group *ng)
@@ -2384,7 +2385,7 @@ static void task_numa_placement(struct task_struct *p)
 				 * is at the beginning of the numa_faults array.
 				 */
 				ng->faults[mem_idx] += diff;
-				ng->faults_cpu[mem_idx] += f_diff;
+				ng->faults[cpu_idx] += f_diff;
 				ng->total_faults += diff;
 				group_faults += ng->faults[mem_idx];
 			}
@@ -2450,9 +2451,6 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
 		grp->max_faults_cpu = 0;
 		spin_lock_init(&grp->lock);
 		grp->gid = p->pid;
-		/* Second half of the array tracks nids where faults happen */
-		grp->faults_cpu = grp->faults + NR_NUMA_HINT_FAULT_TYPES *
-						nr_node_ids;
 
 		for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
 			grp->faults[i] = p->numa_faults[i];

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [tip: sched/core] sched/numa: Replace hard-coded number by a define in numa_task_group()
  2021-10-04 10:57 ` [PATCH 1/4] sched/numa: Replace hard-coded number by a define in numa_task_group() Bharata B Rao
  2021-10-05  8:18   ` Mel Gorman
  2021-10-09 10:07   ` [tip: sched/core] " tip-bot2 for Bharata B Rao
@ 2021-10-14 11:16   ` tip-bot2 for Bharata B Rao
  2 siblings, 0 replies; 17+ messages in thread
From: tip-bot2 for Bharata B Rao @ 2021-10-14 11:16 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Bharata B Rao, Peter Zijlstra (Intel), Mel Gorman, x86, linux-kernel

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     7a2341fc1fec0b8b3580be4226ea244756d3a1b3
Gitweb:        https://git.kernel.org/tip/7a2341fc1fec0b8b3580be4226ea244756d3a1b3
Author:        Bharata B Rao <bharata@amd.com>
AuthorDate:    Mon, 04 Oct 2021 16:27:03 +05:30
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 14 Oct 2021 13:09:58 +02:00

sched/numa: Replace hard-coded number by a define in numa_task_group()

While allocating group fault stats, task_numa_group()
is using a hard coded number 4. Replace this by
NR_NUMA_HINT_FAULT_STATS.

No functionality change in this commit.

Signed-off-by: Bharata B Rao <bharata@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Link: https://lkml.kernel.org/r/20211004105706.3669-2-bharata@amd.com
---
 kernel/sched/fair.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2468d1d..fc0a0ed 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2438,7 +2438,8 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
 
 	if (unlikely(!deref_curr_numa_group(p))) {
 		unsigned int size = sizeof(struct numa_group) +
-				    4*nr_node_ids*sizeof(unsigned long);
+				    NR_NUMA_HINT_FAULT_STATS *
+				    nr_node_ids * sizeof(unsigned long);
 
 		grp = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
 		if (!grp)

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2021-10-14 11:16 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-04 10:57 [PATCH 0/4] A few autonuma cleanups Bharata B Rao
2021-10-04 10:57 ` [PATCH 1/4] sched/numa: Replace hard-coded number by a define in numa_task_group() Bharata B Rao
2021-10-05  8:18   ` Mel Gorman
2021-10-09 10:07   ` [tip: sched/core] " tip-bot2 for Bharata B Rao
2021-10-14 11:16   ` tip-bot2 for Bharata B Rao
2021-10-04 10:57 ` [PATCH 2/4] sched/numa: Remove the redundant member numa_group::fault_cpus Bharata B Rao
2021-10-05  8:21   ` Mel Gorman
2021-10-09 10:07   ` [tip: sched/core] " tip-bot2 for Bharata B Rao
2021-10-14 11:16   ` tip-bot2 for Bharata B Rao
2021-10-04 10:57 ` [PATCH 3/4] sched/numa: Fix a few comments Bharata B Rao
2021-10-05  8:22   ` Mel Gorman
2021-10-09 10:07   ` [tip: sched/core] " tip-bot2 for Bharata B Rao
2021-10-14 11:16   ` tip-bot2 for Bharata B Rao
2021-10-04 10:57 ` [RFC PATCH 4/4] sched/numa: Don't update mm->numa_next_scan from fault path Bharata B Rao
2021-10-05  8:23   ` Mel Gorman
2021-10-05  9:10     ` Bharata B Rao
2021-10-07 10:25       ` Mel Gorman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).