LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH sched-devel 1/7] cpuisol: Make cpu isolation configrable and export isolated map
@ 2008-02-22 21:09 Max Krasnyansky
  2008-02-22 21:09 ` [PATCH sched-devel 2/7] cpuisol: Do not route IRQs to the CPUs isolated at boot Max Krasnyansky
  0 siblings, 1 reply; 7+ messages in thread
From: Max Krasnyansky @ 2008-02-22 21:09 UTC (permalink / raw)
  To: mingo; +Cc: linux-kernel, a.p.zijlstra, pj, Max Krasnyansky

This simple patch introduces new config option for CPU isolation.
The reason I created the separate Kconfig file here is because more
options will be added by the following patches.

The patch also exports cpu_isolated_map, provides cpu_isolated()
accessor macro and provides access to the isolation bit via sysfs.
In other words cpu_isolated_map is exposed to the rest of the kernel
and the user-space in much the same way cpu_online_map is exposed today.

While at it I also moved cpu_*_map from kernel/sched.c into kernel/cpu.c
Those maps have very little to do with the scheduler these days and
therefor seem out of place in the scheduler code.

This patch does not change/affect any existing scheduler functionality.

Signed-off-by: Max Krasnyansky <maxk@qualcomm.com>
---
 arch/x86/Kconfig        |    1 +
 drivers/base/cpu.c      |   48 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/cpumask.h |    3 ++
 kernel/Kconfig.cpuisol  |   15 ++++++++++++++
 kernel/Makefile         |    4 +-
 kernel/cpu.c            |   49 +++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched.c          |   36 ----------------------------------
 7 files changed, 118 insertions(+), 38 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3be2305..d228488 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -526,6 +526,7 @@ config SCHED_MC
 	  increased overhead in some places. If unsure say N here.
 
 source "kernel/Kconfig.preempt"
+source "kernel/Kconfig.cpuisol"
 
 config X86_UP_APIC
 	bool "Local APIC support on uniprocessors"
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 499b003..b6c5e0f 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -55,10 +55,58 @@ static ssize_t store_online(struct sys_device *dev, const char *buf,
 }
 static SYSDEV_ATTR(online, 0644, show_online, store_online);
 
+#ifdef CONFIG_CPUISOL
+/*
+ * This is under config hotplug because in order to 
+ * dynamically isolate a CPU it needs to be brought off-line first.
+ * In other words the sequence is
+ *   echo 0 > /sys/device/system/cpuN/online
+ *   echo 1 > /sys/device/system/cpuN/isolated
+ *   echo 1 > /sys/device/system/cpuN/online
+ */
+static ssize_t show_isol(struct sys_device *dev, char *buf)
+{
+	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+
+	return sprintf(buf, "%u\n", !!cpu_isolated(cpu->sysdev.id));
+}
+
+static ssize_t store_isol(struct sys_device *dev, const char *buf,
+			    size_t count)
+{
+	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+	ssize_t ret = 0;
+
+	if (cpu_online(cpu->sysdev.id))
+		return -EBUSY;
+
+	switch (buf[0]) {
+	case '0':
+		cpu_clear(cpu->sysdev.id, cpu_isolated_map);
+		break;
+	case '1':
+		cpu_set(cpu->sysdev.id, cpu_isolated_map);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (ret >= 0)
+		ret = count;
+	return ret;
+}
+static SYSDEV_ATTR(isolated, 0600, show_isol, store_isol);
+#endif /* CONFIG_CPUISOL */
+
 static void __devinit register_cpu_control(struct cpu *cpu)
 {
 	sysdev_create_file(&cpu->sysdev, &attr_online);
+
+#ifdef CONFIG_CPUISOL
+	sysdev_create_file(&cpu->sysdev, &attr_isolated);
+#endif
 }
+
 void unregister_cpu(struct cpu *cpu)
 {
 	int logical_cpu = cpu->sysdev.id;
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 7047f58..cde2964 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -380,6 +380,7 @@ static inline void __cpus_remap(cpumask_t *dstp, const cpumask_t *srcp,
 extern cpumask_t cpu_possible_map;
 extern cpumask_t cpu_online_map;
 extern cpumask_t cpu_present_map;
+extern cpumask_t cpu_isolated_map;
 
 #if NR_CPUS > 1
 #define num_online_cpus()	cpus_weight(cpu_online_map)
@@ -388,6 +389,7 @@ extern cpumask_t cpu_present_map;
 #define cpu_online(cpu)		cpu_isset((cpu), cpu_online_map)
 #define cpu_possible(cpu)	cpu_isset((cpu), cpu_possible_map)
 #define cpu_present(cpu)	cpu_isset((cpu), cpu_present_map)
+#define cpu_isolated(cpu)	cpu_isset((cpu), cpu_isolated_map)
 #else
 #define num_online_cpus()	1
 #define num_possible_cpus()	1
@@ -395,6 +397,7 @@ extern cpumask_t cpu_present_map;
 #define cpu_online(cpu)		((cpu) == 0)
 #define cpu_possible(cpu)	((cpu) == 0)
 #define cpu_present(cpu)	((cpu) == 0)
+#define cpu_isolated(cpu)	(0)
 #endif
 
 #define cpu_is_offline(cpu)	unlikely(!cpu_online(cpu))
diff --git a/kernel/Kconfig.cpuisol b/kernel/Kconfig.cpuisol
new file mode 100644
index 0000000..e606477
--- /dev/null
+++ b/kernel/Kconfig.cpuisol
@@ -0,0 +1,15 @@
+config CPUISOL
+	depends on SMP
+	bool "CPU isolation"
+	help
+	  This option enables support for CPU isolation.
+	  If enabled the kernel will try to avoid kernel activity on the isolated CPUs.
+	  By default user-space threads are not scheduled on the isolated CPUs unless 
+	  they explicitly request it (via sched_ and pthread_ affinity calls). Isolated
+	  CPUs are not subject to the scheduler load-balancing algorithms.
+	  
+	  CPUs can be marked as isolated using 'isolcpus=' command line option or by 
+	  writing '1' into /sys/devices/system/cpu/cpuN/isolated.
+	  
+	  This feature is useful for hard realtime and high performance applications.
+	  If unsure say 'N'.
diff --git a/kernel/Makefile b/kernel/Makefile
index 6c584c5..bb8da0a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -3,7 +3,7 @@
 #
 
 obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
-	    exit.o itimer.o time.o softirq.o resource.o \
+	    cpu.o exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    rcupdate.o extable.o params.o posix-timers.o \
@@ -27,7 +27,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
 obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
 obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
-obj-$(CONFIG_SMP) += cpu.o spinlock.o
+obj-$(CONFIG_SMP) += spinlock.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
 obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
 obj-$(CONFIG_UID16) += uid16.o
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2eff3f6..a0ac386 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -15,6 +15,36 @@
 #include <linux/stop_machine.h>
 #include <linux/mutex.h>
 
+/*
+ * Represents all cpu's present in the system
+ * In systems capable of hotplug, this map could dynamically grow
+ * as new cpu's are detected in the system via any platform specific
+ * method, such as ACPI for e.g.
+ */
+
+cpumask_t cpu_present_map __read_mostly;
+EXPORT_SYMBOL(cpu_present_map);
+
+/*
+ * Represents isolated cpu's.
+ * In general any kernel activity should be avoided as much as possible
+ * on these cpu's. Isolated cpu's are not load balanced by the scheduler. 
+ */
+cpumask_t cpu_isolated_map __read_mostly = CPU_MASK_NONE;
+EXPORT_SYMBOL(cpu_isolated_map);
+
+#ifndef CONFIG_SMP
+
+cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
+EXPORT_SYMBOL(cpu_online_map);
+
+cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
+EXPORT_SYMBOL(cpu_possible_map);
+
+#endif
+
+#ifdef CONFIG_SMP
+
 /* Serializes the updates to cpu_online_map, cpu_present_map */
 static DEFINE_MUTEX(cpu_add_remove_lock);
 
@@ -413,3 +443,22 @@ out:
 	cpu_maps_update_done();
 }
 #endif /* CONFIG_PM_SLEEP_SMP */
+
+#ifdef CONFIG_CPUISOL
+/* Setup the mask of isolated cpus */
+static int __init isolated_cpu_setup(char *str)
+{
+	int ints[NR_CPUS], i;
+
+	str = get_options(str, ARRAY_SIZE(ints), ints);
+	cpus_clear(cpu_isolated_map);
+	for (i = 1; i <= ints[0]; i++)
+		if (ints[i] < NR_CPUS)
+			cpu_set(ints[i], cpu_isolated_map);
+	return 1;
+}
+
+__setup("isolcpus=", isolated_cpu_setup);
+#endif
+
+#endif /* CONFIG_SMP */
diff --git a/kernel/sched.c b/kernel/sched.c
index f28f19e..10a533e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4839,24 +4839,6 @@ asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
 	return sched_setaffinity(pid, new_mask);
 }
 
-/*
- * Represents all cpu's present in the system
- * In systems capable of hotplug, this map could dynamically grow
- * as new cpu's are detected in the system via any platform specific
- * method, such as ACPI for e.g.
- */
-
-cpumask_t cpu_present_map __read_mostly;
-EXPORT_SYMBOL(cpu_present_map);
-
-#ifndef CONFIG_SMP
-cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
-EXPORT_SYMBOL(cpu_online_map);
-
-cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
-EXPORT_SYMBOL(cpu_possible_map);
-#endif
-
 long sched_getaffinity(pid_t pid, cpumask_t *mask)
 {
 	struct task_struct *p;
@@ -6212,24 +6194,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
 	rcu_assign_pointer(rq->sd, sd);
 }
 
-/* cpus with isolated domains */
-static cpumask_t cpu_isolated_map = CPU_MASK_NONE;
-
-/* Setup the mask of cpus configured for isolated domains */
-static int __init isolated_cpu_setup(char *str)
-{
-	int ints[NR_CPUS], i;
-
-	str = get_options(str, ARRAY_SIZE(ints), ints);
-	cpus_clear(cpu_isolated_map);
-	for (i = 1; i <= ints[0]; i++)
-		if (ints[i] < NR_CPUS)
-			cpu_set(ints[i], cpu_isolated_map);
-	return 1;
-}
-
-__setup("isolcpus=", isolated_cpu_setup);
-
 /*
  * init_sched_build_groups takes the cpumask we wish to span, and a pointer
  * to a function which identifies what group(along with sched group) a CPU
-- 
1.5.4.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH sched-devel 2/7] cpuisol: Do not route IRQs to the CPUs isolated at boot
  2008-02-22 21:09 [PATCH sched-devel 1/7] cpuisol: Make cpu isolation configrable and export isolated map Max Krasnyansky
@ 2008-02-22 21:09 ` Max Krasnyansky
  2008-02-22 21:09   ` [PATCH sched-devel 3/7] cpuisol: Do not schedule workqueues on the isolated CPUs Max Krasnyansky
  0 siblings, 1 reply; 7+ messages in thread
From: Max Krasnyansky @ 2008-02-22 21:09 UTC (permalink / raw)
  To: mingo; +Cc: linux-kernel, a.p.zijlstra, pj, Max Krasnyansky

Most people would expect isolated CPUs to not get any
IRQs by default. This happens naturally if a CPU is brought
off-line, marked isolated and then brought back online.

There was some confusion about this patch originaly. So I wanted
to clarify that it does not completely disable IRQ handling on
the isolated CPUs. Users still have the option or routing IRQs
to them by modifying IRQ affinity mask.

I cannot test other archs hence the patch is for x86_64 only.

Signed-off-by: Max Krasnyansky <maxk@qualcomm.com>
---
 arch/x86/kernel/genapic_flat_64.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 07352b7..e02e58c 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -21,7 +21,9 @@
 
 static cpumask_t flat_target_cpus(void)
 {
-	return cpu_online_map;
+	cpumask_t target;
+	cpus_andnot(target, cpu_online_map, cpu_isolated_map);
+	return target;
 }
 
 static cpumask_t flat_vector_allocation_domain(int cpu)
-- 
1.5.4.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH sched-devel 3/7] cpuisol: Do not schedule workqueues on the isolated CPUs
  2008-02-22 21:09 ` [PATCH sched-devel 2/7] cpuisol: Do not route IRQs to the CPUs isolated at boot Max Krasnyansky
@ 2008-02-22 21:09   ` Max Krasnyansky
  2008-02-22 21:09     ` [PATCH sched-devel 4/7] cpuisol: Move on-stack array used for boot cmd parsing into __initdata Max Krasnyansky
  0 siblings, 1 reply; 7+ messages in thread
From: Max Krasnyansky @ 2008-02-22 21:09 UTC (permalink / raw)
  To: mingo; +Cc: linux-kernel, a.p.zijlstra, pj, Max Krasnyansky

This patch is addressing the use case when a high priority realtime (FIFO, RR) user-space
thread is using 100% CPU for extended periods of time. In which case kernel workqueue
threads do not get a chance to run and entire machine essentially hangs because other CPUs
are waiting for scheduled workqueues to flush.

This use case is perfectly valid if one is using a CPU as a dedicated engine
(crunching numbers, hard realtime, etc). Think of it as an SPE in the Cell processor.
Which is what CPU isolation enables in the first place.

Most kernel subsystems do not rely on the per CPU workqueues. In fact we already
have support for single threaded workqueues, this patch just makes it automatic.
As mentioned in the introductory email this functionality has been tested on a wide
range of full fledged systems (with IDE, SATA, USB, automount, NFS, NUMA, etc) in the
production environment.

The only feature (that I know of) that does not work when workqueue isolation is enabled is
OProfile. It does not result in crashes or instability, OProfile is just unable to collect
stats from the isolated CPUs. Hence this feature is marked as experimental.

There is zero overhead if workqueue isolation is disabled.

Signed-off-by: Max Krasnyansky <maxk@qualcomm.com>
---
 kernel/Kconfig.cpuisol |    9 +++++++++
 kernel/workqueue.c     |   30 +++++++++++++++++++++++-------
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/kernel/Kconfig.cpuisol b/kernel/Kconfig.cpuisol
index e606477..81f1972 100644
--- a/kernel/Kconfig.cpuisol
+++ b/kernel/Kconfig.cpuisol
@@ -13,3 +13,12 @@ config CPUISOL
 	  
 	  This feature is useful for hard realtime and high performance applications.
 	  If unsure say 'N'.
+
+config CPUISOL_WORKQUEUE
+	bool "Do not schedule workqueues on the isolated CPUs (EXPERIMENTAL)"
+	depends on CPUISOL && EXPERIMENTAL
+	help
+	  In this option is enabled kernel will not schedule workqueues on the 
+	  isolated CPUs.
+	  Please note that at this point this feature is experimental. It brakes 
+	  certain things like OProfile that heavily rely on per cpu workqueues.
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ff06611..f48e13c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -35,6 +35,16 @@
 #include <linux/lockdep.h>
 
 /*
+ * Stub out cpu_isolated() if isolated CPUs are allowed to 
+ * run workqueues.
+ */
+#ifdef CONFIG_CPUISOL_WORKQUEUE
+#define cpu_unusable(cpu) cpu_isolated(cpu)
+#else
+#define cpu_unusable(cpu) (0)
+#endif
+
+/*
  * The per-CPU workqueue (if single thread, we always use the first
  * possible cpu).
  */
@@ -97,7 +107,7 @@ static const cpumask_t *wq_cpu_map(struct workqueue_struct *wq)
 static
 struct cpu_workqueue_struct *wq_per_cpu(struct workqueue_struct *wq, int cpu)
 {
-	if (unlikely(is_single_threaded(wq)))
+	if (unlikely(is_single_threaded(wq)) || cpu_unusable(cpu))
 		cpu = singlethread_cpu;
 	return per_cpu_ptr(wq->cpu_wq, cpu);
 }
@@ -229,9 +239,11 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
 
-		if (unlikely(cpu >= 0))
+		if (unlikely(cpu >= 0)) {
+			if (cpu_unusable(cpu))
+				cpu = singlethread_cpu;
 			add_timer_on(timer, cpu);
-		else
+		} else
 			add_timer(timer);
 		ret = 1;
 	}
@@ -605,7 +617,8 @@ int schedule_on_each_cpu(work_func_t func)
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
 		struct work_struct *work = per_cpu_ptr(works, cpu);
-
+		if (cpu_unusable(cpu))
+			continue;
 		INIT_WORK(work, func);
 		set_bit(WORK_STRUCT_PENDING, work_data_bits(work));
 		__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work);
@@ -754,7 +767,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 
 		for_each_possible_cpu(cpu) {
 			cwq = init_cpu_workqueue(wq, cpu);
-			if (err || !cpu_online(cpu))
+			if (err || !cpu_online(cpu) || cpu_unusable(cpu))
 				continue;
 			err = create_workqueue_thread(cwq, cpu);
 			start_workqueue_thread(cwq, cpu);
@@ -833,8 +846,11 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 	struct cpu_workqueue_struct *cwq;
 	struct workqueue_struct *wq;
 
-	action &= ~CPU_TASKS_FROZEN;
+	if (cpu_unusable(cpu))
+		return NOTIFY_OK;
 
+	action &= ~CPU_TASKS_FROZEN;
+	
 	switch (action) {
 
 	case CPU_UP_PREPARE:
@@ -869,7 +885,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 
 void __init init_workqueues(void)
 {
-	cpu_populated_map = cpu_online_map;
+	cpus_andnot(cpu_populated_map, cpu_online_map, cpu_isolated_map);
 	singlethread_cpu = first_cpu(cpu_possible_map);
 	cpu_singlethread_map = cpumask_of_cpu(singlethread_cpu);
 	hotcpu_notifier(workqueue_cpu_callback, 0);
-- 
1.5.4.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH sched-devel 4/7] cpuisol: Move on-stack array used for boot cmd parsing into __initdata
  2008-02-22 21:09   ` [PATCH sched-devel 3/7] cpuisol: Do not schedule workqueues on the isolated CPUs Max Krasnyansky
@ 2008-02-22 21:09     ` Max Krasnyansky
  2008-02-22 21:09       ` [PATCH sched-devel 5/7] cpuisol: Documentation updates Max Krasnyansky
  0 siblings, 1 reply; 7+ messages in thread
From: Max Krasnyansky @ 2008-02-22 21:09 UTC (permalink / raw)
  To: mingo; +Cc: linux-kernel, a.p.zijlstra, pj, Max Krasnyansky

Suggested by Andrew Morton:

  isolated_cpu_setup() has an on-stack array of NR_CPUS integers.  This
  will consume 4k of stack on ia64 (at least).  We'll just squeak through
  for a ittle while, but this needs to be fixed.  Just move it into
  __initdata.

Signed-off-by: Max Krasnyansky <maxk@qualcomm.com>
---
 kernel/cpu.c |   15 ++++++++++-----
 1 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index a0ac386..b3af739 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -446,15 +446,20 @@ out:
 
 #ifdef CONFIG_CPUISOL
 /* Setup the mask of isolated cpus */
+
+static int __initdata isolcpu[NR_CPUS];
+
 static int __init isolated_cpu_setup(char *str)
 {
-	int ints[NR_CPUS], i;
+	int i, n;
+
+	str = get_options(str, ARRAY_SIZE(isolcpu), isolcpu);
+	n   = isolcpu[0];
 
-	str = get_options(str, ARRAY_SIZE(ints), ints);
 	cpus_clear(cpu_isolated_map);
-	for (i = 1; i <= ints[0]; i++)
-		if (ints[i] < NR_CPUS)
-			cpu_set(ints[i], cpu_isolated_map);
+	for (i = 1; i <= n; i++)
+		if (isolcpu[i] < NR_CPUS)
+			cpu_set(isolcpu[i], cpu_isolated_map);
 	return 1;
 }
 
-- 
1.5.4.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH sched-devel 5/7] cpuisol: Documentation updates
  2008-02-22 21:09     ` [PATCH sched-devel 4/7] cpuisol: Move on-stack array used for boot cmd parsing into __initdata Max Krasnyansky
@ 2008-02-22 21:09       ` Max Krasnyansky
  2008-02-22 21:09         ` [PATCH sched-devel 6/7] cpuisol: Minor updates to the Kconfig options Max Krasnyansky
  0 siblings, 1 reply; 7+ messages in thread
From: Max Krasnyansky @ 2008-02-22 21:09 UTC (permalink / raw)
  To: mingo; +Cc: linux-kernel, a.p.zijlstra, pj, Max Krasnyansky

Documented sysfs interface as suggested by Andrew Morton.
Added general documentation that describes how to configure
and use CPU isolation features.

Signed-off-by: Max Krasnyansky <maxk@qualcomm.com>
---
 Documentation/ABI/testing/sysfs-devices-system-cpu |   41 +++++++
 Documentation/cpu-isolation.txt                    |  113 ++++++++++++++++++++
 2 files changed, 154 insertions(+), 0 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
new file mode 100644
index 0000000..32dde5b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -0,0 +1,41 @@
+What:           /sys/devices/system/cpu/...
+Date:           Feb. 2008
+KernelVersion:  2.6.24 
+Contact:        LKML <linux-kernel@vger.kernel.org>
+Description:
+
+The /sys/devices/system/cpu tree provides information about all cpu's
+known to the running kernel.
+
+Following files are created for each cpu. 'N' is the cpu number.
+
+/sys/devices/system/cpu/cpuN/
+  online     (0644) On-line attribute. Indicates whether the cpu is on-line.
+                    The cpu can be brought off-line by writing '0' into
+                    this file.  Similarly it can be brought back on-line
+                    by writing '1' into this file.  This attribute is
+                    not available for the cpu's that cannot be brought
+                    off-line. Typically cpu0.  For more information see
+                            Documentation/cpu-hotplug.txt
+
+  isolated   (0644) Isolation attribute. Indicates whether the cpu
+                    is isolated.
+                    The cpu can be isolated by writing '1' into this
+                    file.  Similarly it can be un-isolated by writing
+                    '0' into this file.  In order to isolate the cpu it
+                    must first be brought off-line.  This attribute is
+                    not available for the cpu's that cannot be brought
+                    off-line. Typically cpu0.  
+                    Note this attribute is present only if "CPU isolation"
+                    is enabled. For more information see
+                            Documentation/cpu-isolation.txt
+
+  cpufreq    (0755) Frequency scaling state.
+                    For more info see
+                            Documentation/cpu-freq/...
+
+  cache      (0755) Cache information. FIXME
+
+  cpuidle    (0755) Idle state information. FIXME
+
+  topology   (0755) Topology information. FIXME
diff --git a/Documentation/cpu-isolation.txt b/Documentation/cpu-isolation.txt
new file mode 100644
index 0000000..b9ca425
--- /dev/null
+++ b/Documentation/cpu-isolation.txt
@@ -0,0 +1,113 @@
+CPU isolation support in Linux(tm) Kernel
+
+Maintainers:
+
+Scheduler and scheduler domain bits:
+	Ingo Molnar <mingo@elte.hu>
+
+General framework, irq and workqueue isolation:
+	Max Krasnyanskiy <maxk@qualcomm.com>
+
+ChangeLog:
+- Initial version. Feb 2008, MaxK
+
+Introduction
+------------
+
+The primary idea behind CPU isolation is the ability to use some CPU cores
+as a dedicated engines for running user-space code with minimal kernel
+overhead/intervention, think of it as an SPE in the Cell processor. For
+example CPU isolation allows for running CPU intensive(100%) RT task
+on one of the processors without adversely affecting or being affected
+by the other system activities.  With the current (as of early 2008)
+multi-core CPU trend we may see more and more applications that explore
+this capability: real-time gaming engines, simulators, hard real-time
+apps, etc.
+
+Current CPU isolation support consists of the following features:
+
+1. Isolated CPU(s) are excluded from the scheduler load balancing logic.
+   Applications must explicitly bind threads in order to run on those
+   CPU(s).
+
+2. By default interrupts are not routed to the isolated CPU(s).
+   Users must route interrupts (if any) to those CPU(s) explicitly.
+
+3. Kernel avoids any activity on the isolated CPU(s) as much as possible.
+   This includes workqueues, per CPU threads, etc.  Please note that
+   this feature is optional and is disabled by default.
+
+Kernel configuration options
+----------------------------
+
+Following options need to be enabled in order to use CPU isolation
+   CONFIG_CPUISOL		Top-level config option. Enables general
+                                CPU isolation framework and enables features 
+                                #1 and #2 described above.
+
+   CONFIG_CPUISOL_WORKQUEUE	These options provide deeper isolation
+   CONFIG_CPUISOL_STOPMACHINE   from various kernel subsystems. They implement 
+   CONFIG_CPUISOL_...           feature #3 described above.  
+                                See Kconfig help for more information on each 
+                                individual option.
+
+How to isolate a CPU
+--------------------
+
+There are two ways for isolating a CPU
+
+Kernel boot command line:
+   isolcpus=n0,n1,...
+
+   This option enables isolation for all CPU(s) specified in the comma 
+   separated list.
+   Example:
+        isolcpus=1,5
+
+Sysfs interface:
+   In order to isolate a CPU through this mechanism it must first be
+   brought off-line.  Hence the command sequence looks like:
+       echo 0 > /sys/devices/system/cpu/cpuN/online
+       echo 1 > /sys/devices/system/cpu/cpuN/isolated
+       echo 1 > /sys/devices/system/cpu/cpuN/online
+
+
+Kernel interfaces
+-----------------
+
+cpu_isolated_map  - Bitmap of all isolated CPU(s).
+		    This bitmap is updated either by isolcpus= command
+		    line option or by writing to sysfs attributes. In
+		    the latter case isolation bit can be updated only
+		    when the corresponding CPU is off-line.
+
+cpu_isolated(N)   - Use that function to check whether CPU N is isolated.
+
+
+User-space application interfaces
+---------------------------------
+
+From the user-space application perspective isolated CPU(s) are no
+different from any other CPU in the system. Once the required CPU(s)
+have been isolated applications can use standard POSIX apis to bind its
+threads to those CPU(s):
+	sched_setaffinity
+	sched_getaffinity
+	pthread_getaffinity_np
+	pthread_setaffinity_np
+
+One thing to keep in mind though is that Linux kernel heavily uses per-CPU
+data structures and mechanisms. For example if a thread running on an
+isolated CPU makes a system call, that system call will be serviced
+on the same CPU.  Some system calls trigger timers, softirq and other
+kernel activities. Those will likely also run on the same CPU where
+system call was originally serviced.  If this behavior is undesirable
+threads running on the isolated CPU(s) should avoid using certain system
+calls and instead delegate that to the threads running on non-isolated
+CPU(s). For example you probably do not want to write to the filesystem
+or the console. Generally, time, mutex and pthread related system calls
+are perfectly safe.
+
+If you intend to use CPU isolation for real-time applications please
+check out rt wiki at
+	http://rt.wiki.kernel.org/index.php/Main_Page
-- 
1.5.4.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH sched-devel 6/7] cpuisol: Minor updates to the Kconfig options
  2008-02-22 21:09       ` [PATCH sched-devel 5/7] cpuisol: Documentation updates Max Krasnyansky
@ 2008-02-22 21:09         ` Max Krasnyansky
  2008-02-22 21:09           ` [PATCH sched-devel 7/7] cpuisol: Do not halt isolated CPUs with Stop Machine Max Krasnyansky
  0 siblings, 1 reply; 7+ messages in thread
From: Max Krasnyansky @ 2008-02-22 21:09 UTC (permalink / raw)
  To: mingo; +Cc: linux-kernel, a.p.zijlstra, pj, Max Krasnyansky

Fixed a couple of typos, long lines and referred to the documentation file.

Signed-off-by: Max Krasnyansky <maxk@qualcomm.com>
---
 kernel/Kconfig.cpuisol |   31 +++++++++++++++++--------------
 1 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/kernel/Kconfig.cpuisol b/kernel/Kconfig.cpuisol
index 81f1972..e681b02 100644
--- a/kernel/Kconfig.cpuisol
+++ b/kernel/Kconfig.cpuisol
@@ -2,23 +2,26 @@ config CPUISOL
 	depends on SMP
 	bool "CPU isolation"
 	help
-	  This option enables support for CPU isolation.
-	  If enabled the kernel will try to avoid kernel activity on the isolated CPUs.
-	  By default user-space threads are not scheduled on the isolated CPUs unless 
-	  they explicitly request it (via sched_ and pthread_ affinity calls). Isolated
-	  CPUs are not subject to the scheduler load-balancing algorithms.
-	  
-	  CPUs can be marked as isolated using 'isolcpus=' command line option or by 
-	  writing '1' into /sys/devices/system/cpu/cpuN/isolated.
-	  
-	  This feature is useful for hard realtime and high performance applications.
+	  This option enables support for CPU isolation. If enabled the
+	  kernel will try to avoid kernel activity on the isolated CPUs.
+	  By default user-space threads are not scheduled on the isolated
+	  CPUs unless they explicitly request it via sched_setaffinity()
+	  and pthread_setaffinity_np() calls. Isolated CPUs are not
+	  subject to the scheduler load-balancing algorithms.
+
+	  This feature is useful for hard realtime and high performance
+	  applications.
+	  See Documentation/cpu-isolation.txt for more details.
+
 	  If unsure say 'N'.
 
 config CPUISOL_WORKQUEUE
 	bool "Do not schedule workqueues on the isolated CPUs (EXPERIMENTAL)"
 	depends on CPUISOL && EXPERIMENTAL
 	help
-	  In this option is enabled kernel will not schedule workqueues on the 
-	  isolated CPUs.
-	  Please note that at this point this feature is experimental. It brakes 
-	  certain things like OProfile that heavily rely on per cpu workqueues.
+	  If this option is enabled kernel will not schedule workqueues on
+	  the isolated CPUs.  Please note that at this point this feature
+	  is experimental. It breaks certain things like OProfile that
+	  heavily rely on per cpu workqueues.
+
+	  Say 'Y' to enable workqueue isolation.  If unsure say 'N'.
-- 
1.5.4.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH sched-devel 7/7] cpuisol: Do not halt isolated CPUs with Stop Machine
  2008-02-22 21:09         ` [PATCH sched-devel 6/7] cpuisol: Minor updates to the Kconfig options Max Krasnyansky
@ 2008-02-22 21:09           ` Max Krasnyansky
  0 siblings, 0 replies; 7+ messages in thread
From: Max Krasnyansky @ 2008-02-22 21:09 UTC (permalink / raw)
  To: mingo; +Cc: linux-kernel, a.p.zijlstra, pj, Max Krasnyansky

This patch makes "stop machine" ignore isolated CPUs (if the config option is enabled).

It addresses exact same usecase explained in the previous workqueue isolation patch.
Where a user-space RT thread can prevent stop machine threads from running, which causes
the entire system to hang.

Stop machine is particularly bad when it comes to latencies because it halts every single
CPU and may take several milliseconds to complete. It's currently used for module insertion
and removal only.
As some folks pointed out in the previous discussions this patch is potentially unsafe
if applications running on the isolated CPUs use kernel services affected by the module
insertion and removal.
I've been running kernels with this patch on a wide range of the machines in production
environment were we routinely insert/remove modules with applications running on isolated
CPUs. Also I've recently done quite a bit of testing on life multi-core systems with
"stop machine" _completely_ disabled, and was not able to trigger any problems.
For more details please see this thread
	http://marc.info/?l=linux-kernel&m=120243837206248&w=2
That of course does not mean that the patch is totally safe but it does not seem to
cause any instability in real life.

This feature does not add any overhead when disabled. It's marked as experimental
due to potential issues mentioned above.

Signed-off-by: Max Krasnyansky <maxk@qualcomm.com>
---
 kernel/Kconfig.cpuisol |   15 +++++++++++++++
 kernel/stop_machine.c  |    8 +++++++-
 2 files changed, 22 insertions(+), 1 deletions(-)

diff --git a/kernel/Kconfig.cpuisol b/kernel/Kconfig.cpuisol
index e681b02..24c1ef0 100644
--- a/kernel/Kconfig.cpuisol
+++ b/kernel/Kconfig.cpuisol
@@ -25,3 +25,18 @@ config CPUISOL_WORKQUEUE
 	  heavily rely on per cpu workqueues.
 
 	  Say 'Y' to enable workqueue isolation.  If unsure say 'N'.
+
+config CPUISOL_STOPMACHINE
+	bool "Do not halt isolated CPUs with Stop Machine (EXPERIMENTAL)"
+	depends on CPUISOL && STOP_MACHINE && EXPERIMENTAL
+	help
+	  If this option is enabled kernel will not halt isolated CPUs
+	  when Stop Machine is triggered. Stop Machine is currently only
+	  used by the module insertion and removal.
+	  Please note that at this point this feature is experimental. It is 
+	  not known to really break anything but can potentially introduce
+	  an instability due to race conditions in module removal logic.
+
+	  Say 'Y' if support for dynamic module insertion and removal is
+	  required for the system that uses isolated CPUs. 
+	  If unsure say 'N'.
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 6f4e0e1..aa3af15 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -89,6 +89,12 @@ static void stopmachine_set_state(enum stopmachine_state state)
 		cpu_relax();
 }
 
+#ifdef CONFIG_CPUISOL_STOPMACHINE
+#define cpu_unusable(cpu) cpu_isolated(cpu)
+#else
+#define cpu_unusable(cpu) (0)
+#endif
+
 static int stop_machine(void)
 {
 	int i, ret = 0;
@@ -98,7 +104,7 @@ static int stop_machine(void)
 	stopmachine_state = STOPMACHINE_WAIT;
 
 	for_each_online_cpu(i) {
-		if (i == raw_smp_processor_id())
+		if (i == raw_smp_processor_id() || cpu_unusable(i))
 			continue;
 		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
 		if (ret < 0)
-- 
1.5.4.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2008-02-22 21:12 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-02-22 21:09 [PATCH sched-devel 1/7] cpuisol: Make cpu isolation configrable and export isolated map Max Krasnyansky
2008-02-22 21:09 ` [PATCH sched-devel 2/7] cpuisol: Do not route IRQs to the CPUs isolated at boot Max Krasnyansky
2008-02-22 21:09   ` [PATCH sched-devel 3/7] cpuisol: Do not schedule workqueues on the isolated CPUs Max Krasnyansky
2008-02-22 21:09     ` [PATCH sched-devel 4/7] cpuisol: Move on-stack array used for boot cmd parsing into __initdata Max Krasnyansky
2008-02-22 21:09       ` [PATCH sched-devel 5/7] cpuisol: Documentation updates Max Krasnyansky
2008-02-22 21:09         ` [PATCH sched-devel 6/7] cpuisol: Minor updates to the Kconfig options Max Krasnyansky
2008-02-22 21:09           ` [PATCH sched-devel 7/7] cpuisol: Do not halt isolated CPUs with Stop Machine Max Krasnyansky

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).