LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH 2/11] Sched clock paravirt op
@ 2007-02-06 3:52 Zachary Amsden
2007-02-06 4:00 ` Zachary Amsden
2007-02-06 12:32 ` Andi Kleen
0 siblings, 2 replies; 8+ messages in thread
From: Zachary Amsden @ 2007-02-06 3:52 UTC (permalink / raw)
To: Linux Kernel Mailing List, Andrew Morton, Andi Kleen,
Rusty Russell, Jeremy Fitzhardinge, Chris Wright, Zachary Amsden
diff -r 3e746c0ebcdf arch/i386/kernel/paravirt.c
--- a/arch/i386/kernel/paravirt.c Fri Feb 02 13:54:53 2007 -0800
+++ b/arch/i386/kernel/paravirt.c Fri Feb 02 15:27:50 2007 -0800
@@ -32,6 +32,7 @@
#include <asm/fixmap.h>
#include <asm/apic.h>
#include <asm/tlbflush.h>
+#include <asm/timer.h>
/* nop stub */
static void native_nop(void)
@@ -523,6 +524,8 @@ struct paravirt_ops paravirt_ops = {
.write_msr = native_write_msr,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
+ .get_scheduled_cycles = native_read_tsc,
+ .get_cpu_khz = native_calculate_cpu_khz,
.load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt,
.load_gdt = native_load_gdt,
diff -r 3e746c0ebcdf arch/i386/kernel/tsc.c
--- a/arch/i386/kernel/tsc.c Fri Feb 02 13:54:53 2007 -0800
+++ b/arch/i386/kernel/tsc.c Fri Feb 02 13:54:53 2007 -0800
@@ -14,6 +14,7 @@
#include <asm/delay.h>
#include <asm/tsc.h>
#include <asm/io.h>
+#include <asm/timer.h>
#include "mach_timer.h"
@@ -102,9 +103,6 @@ unsigned long long sched_clock(void)
{
unsigned long long this_offset;
- if (unlikely(custom_sched_clock))
- return (*custom_sched_clock)();
-
/*
* Fall back to jiffies if there's no TSC available:
*/
@@ -113,13 +111,13 @@ unsigned long long sched_clock(void)
return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
/* read the Time Stamp Counter: */
- rdtscll(this_offset);
+ get_scheduled_cycles(this_offset);
/* return the value in ns */
return cycles_2_ns(this_offset);
}
-static unsigned long calculate_cpu_khz(void)
+unsigned long native_calculate_cpu_khz(void)
{
unsigned long long start, end;
unsigned long count;
diff -r 3e746c0ebcdf arch/i386/kernel/vmi.c
--- a/arch/i386/kernel/vmi.c Fri Feb 02 13:54:53 2007 -0800
+++ b/arch/i386/kernel/vmi.c Fri Feb 02 15:32:20 2007 -0800
@@ -880,7 +880,7 @@ static int __init activate_vmi(void)
paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm;
paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm;
#endif
- custom_sched_clock = vmi_sched_clock;
+ paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles;
}
/*
diff -r 3e746c0ebcdf include/asm-i386/paravirt.h
--- a/include/asm-i386/paravirt.h Fri Feb 02 13:54:53 2007 -0800
+++ b/include/asm-i386/paravirt.h Fri Feb 02 15:27:50 2007 -0800
@@ -94,6 +94,8 @@ struct paravirt_ops
u64 (fastcall *read_tsc)(void);
u64 (fastcall *read_pmc)(void);
+ u64 (*get_scheduled_cycles)(void);
+ unsigned long (*get_cpu_khz)(void);
void (fastcall *load_tr_desc)(void);
void (fastcall *load_gdt)(const struct Xgt_desc_struct *);
@@ -273,6 +275,9 @@ static inline void halt(void)
#define rdtscll(val) (val = paravirt_ops.read_tsc())
+#define get_scheduled_cycles(val) (val = paravirt_ops.get_scheduled_cycles())
+#define calculate_cpu_khz() (paravirt_ops.get_cpu_khz())
+
#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
#define rdpmc(counter,low,high) do { \
diff -r 3e746c0ebcdf include/asm-i386/time.h
--- a/include/asm-i386/time.h Fri Feb 02 13:54:53 2007 -0800
+++ b/include/asm-i386/time.h Fri Feb 02 15:27:50 2007 -0800
@@ -30,7 +30,6 @@ static inline int native_set_wallclock(u
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
-extern unsigned long long native_sched_clock(void);
#else /* !CONFIG_PARAVIRT */
#define get_wallclock() native_get_wallclock()
diff -r 3e746c0ebcdf include/asm-i386/timer.h
--- a/include/asm-i386/timer.h Fri Feb 02 13:54:53 2007 -0800
+++ b/include/asm-i386/timer.h Fri Feb 02 13:54:53 2007 -0800
@@ -4,13 +4,21 @@
#include <linux/pm.h>
#define TICK_SIZE (tick_nsec / 1000)
+
void setup_pit_timer(void);
+unsigned long long native_sched_clock(void);
+unsigned long native_calculate_cpu_khz(void);
+
/* Modifiers for buggy PIT handling */
extern int pit_latch_buggy;
extern int timer_ack;
extern int no_timer_check;
-extern unsigned long long (*custom_sched_clock)(void);
extern int no_sync_cmos_clock;
extern int recalibrate_cpu_khz(void);
+#ifndef CONFIG_PARAVIRT
+#define get_scheduled_cycles(val) rdtscll(val)
+#define calculate_cpu_khz() native_calculate_cpu_khz()
#endif
+
+#endif
diff -r 3e746c0ebcdf arch/i386/kernel/vmitime.c
--- a/arch/i386/kernel/vmitime.c Fri Feb 02 13:54:53 2007 -0800
+++ b/arch/i386/kernel/vmitime.c Fri Feb 02 15:31:35 2007 -0800
@@ -170,7 +170,7 @@ int vmi_set_wallclock(unsigned long now)
return -1;
}
-unsigned long long vmi_sched_clock(void)
+unsigned long long vmi_get_sched_cycles(void)
{
return read_available_cycles();
}
diff -r 3e746c0ebcdf include/asm-i386/vmi_time.h
--- a/include/asm-i386/vmi_time.h Fri Feb 02 13:54:53 2007 -0800
+++ b/include/asm-i386/vmi_time.h Fri Feb 02 15:31:53 2007 -0800
@@ -49,7 +49,7 @@ extern void __init vmi_time_init(void);
extern void __init vmi_time_init(void);
extern unsigned long vmi_get_wallclock(void);
extern int vmi_set_wallclock(unsigned long now);
-extern unsigned long long vmi_sched_clock(void);
+extern unsigned long long vmi_get_sched_cycles(void);
#ifdef CONFIG_X86_LOCAL_APIC
extern void __init vmi_timer_setup_boot_alarm(void);
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/11] Sched clock paravirt op
2007-02-06 3:52 [PATCH 2/11] Sched clock paravirt op Zachary Amsden
@ 2007-02-06 4:00 ` Zachary Amsden
2007-02-06 12:32 ` Andi Kleen
1 sibling, 0 replies; 8+ messages in thread
From: Zachary Amsden @ 2007-02-06 4:00 UTC (permalink / raw)
To: Zachary Amsden
Cc: Linux Kernel Mailing List, Andrew Morton, Andi Kleen,
Rusty Russell, Jeremy Fitzhardinge, Chris Wright
Zachary Amsden wrote:
>
> #include "mach_timer.h"
>
> @@ -102,9 +103,6 @@ unsigned long long sched_clock(void)
> {
> unsigned long long this_offset;
>
> - if (unlikely(custom_sched_clock))
> - return (*custom_sched_clock)();
> -
> /*
> * Fall back to jiffies if there's no TSC available:
> */
> @@ -113,13 +111,13 @@ unsigned long long sched_clock(void)
> return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
>
> /* read the Time Stamp Counter: */
> - rdtscll(this_offset);
> + get_scheduled_cycles(this_offset);
>
> /* return the value in ns */
> return cycles_2_ns(this_offset);
> }
>
I missed a title / signed-off on this guy.
Internally, sched_clock runs in units of nanoseconds, not CPU cycles.
This was wrong in my previous patch. Fix it so everyone can use the
same cycles_2_ns code in tsc.c.
Signed-off-by: Zachary Amsden <zach@vmware.com>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/11] Sched clock paravirt op
2007-02-06 3:52 [PATCH 2/11] Sched clock paravirt op Zachary Amsden
2007-02-06 4:00 ` Zachary Amsden
@ 2007-02-06 12:32 ` Andi Kleen
2007-02-06 22:47 ` Zachary Amsden
1 sibling, 1 reply; 8+ messages in thread
From: Andi Kleen @ 2007-02-06 12:32 UTC (permalink / raw)
To: Zachary Amsden
Cc: Linux Kernel Mailing List, Andrew Morton, Rusty Russell,
Jeremy Fitzhardinge, Chris Wright
> .write_msr = native_write_msr,
> .read_tsc = native_read_tsc,
> .read_pmc = native_read_pmc,
> + .get_scheduled_cycles = native_read_tsc,
> + .get_cpu_khz = native_calculate_cpu_khz,
> .load_tr_desc = native_load_tr_desc,
Description missing?
Please write at least two paragraphs or more on each new hook
you want to add.
My feeling is that rdtsc should work fine here. If not please explain.
-Andi
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/11] Sched clock paravirt op
2007-02-06 12:32 ` Andi Kleen
@ 2007-02-06 22:47 ` Zachary Amsden
2007-02-06 23:23 ` Jeremy Fitzhardinge
0 siblings, 1 reply; 8+ messages in thread
From: Zachary Amsden @ 2007-02-06 22:47 UTC (permalink / raw)
To: Andi Kleen
Cc: Linux Kernel Mailing List, Andrew Morton, Rusty Russell,
Jeremy Fitzhardinge, Chris Wright
Andi Kleen wrote:
>> .write_msr = native_write_msr,
>> .read_tsc = native_read_tsc,
>> .read_pmc = native_read_pmc,
>> + .get_scheduled_cycles = native_read_tsc,
>> + .get_cpu_khz = native_calculate_cpu_khz,
>> .load_tr_desc = native_load_tr_desc,
>>
> Description missing?
>
I missed a title / signed-off on this guy.
Internally, sched_clock runs in units of nanoseconds, not CPU cycles.
This was wrong in my previous patch. Fix it so everyone can use the
same cycles_2_ns code in tsc.c.
Signed-off-by: Zachary Amsden <zach@vmware.com>
> Please write at least two paragraphs or more on each new hook
> you want to add.
>
Not a new hook; I just changed the name.
> My feeling is that rdtsc should work fine here. If not please explain.
>
It depends. Scheduled clock must be in units of available time - stolen
time is not always evenly distributed. If you make rdtsc just be
scheduled clock, that almost works. But most places that use rdtsc
expect it to be in cycles of approximate real time, and to leap forward
if something like SMM comes along and steals time.
Not that this is pretty. Arguably, the TSC should just run at a fixed
rate, not progress during stolen time. This idealized TSC assumption is
not however how Linux is making use of the TSC today. TSC is more like
real time, only in a VM, it can't quite keep up with real time, so it
gets simulated.
Scheduled (or available) time and real time are good notions. Stolen
time is debatable. But TSC is basically just always wrong. That's why
I don't want to overload the rdtsc operation.
Zach
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/11] Sched clock paravirt op
2007-02-06 22:47 ` Zachary Amsden
@ 2007-02-06 23:23 ` Jeremy Fitzhardinge
2007-02-06 23:42 ` Zachary Amsden
0 siblings, 1 reply; 8+ messages in thread
From: Jeremy Fitzhardinge @ 2007-02-06 23:23 UTC (permalink / raw)
To: Zachary Amsden
Cc: Andi Kleen, Linux Kernel Mailing List, Andrew Morton,
Rusty Russell, Chris Wright
Zachary Amsden wrote:
> Scheduled (or available) time and real time are good notions. Stolen
> time is debatable. But TSC is basically just always wrong. That's
> why I don't want to overload the rdtsc operation.
Well, in the Xen case it is actually guaranteed to be correct and useful
as real time, but that's definitely not something we can expect in
general. But you're talking specifically about schedulable vcpu time
here, right?
J
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/11] Sched clock paravirt op
2007-02-06 23:23 ` Jeremy Fitzhardinge
@ 2007-02-06 23:42 ` Zachary Amsden
2007-02-06 23:48 ` Jeremy Fitzhardinge
0 siblings, 1 reply; 8+ messages in thread
From: Zachary Amsden @ 2007-02-06 23:42 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Andi Kleen, Linux Kernel Mailing List, Andrew Morton,
Rusty Russell, Chris Wright
Jeremy Fitzhardinge wrote:
> Zachary Amsden wrote:
>
>> Scheduled (or available) time and real time are good notions. Stolen
>> time is debatable. But TSC is basically just always wrong. That's
>> why I don't want to overload the rdtsc operation.
>>
> Well, in the Xen case it is actually guaranteed to be correct and useful
> as real time, but that's definitely not something we can expect in
> general. But you're talking specifically about schedulable vcpu time
> here, right?
>
Not schedulable time, scheduled time (schedulable - scheduled) = stolen
Zach
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/11] Sched clock paravirt op
2007-02-06 23:42 ` Zachary Amsden
@ 2007-02-06 23:48 ` Jeremy Fitzhardinge
2007-02-06 23:50 ` Zachary Amsden
0 siblings, 1 reply; 8+ messages in thread
From: Jeremy Fitzhardinge @ 2007-02-06 23:48 UTC (permalink / raw)
To: Zachary Amsden
Cc: Andi Kleen, Linux Kernel Mailing List, Andrew Morton,
Rusty Russell, Chris Wright
Zachary Amsden wrote:
> Jeremy Fitzhardinge wrote:
>> Zachary Amsden wrote:
>>
>>> Scheduled (or available) time and real time are good notions. Stolen
>>> time is debatable. But TSC is basically just always wrong. That's
>>> why I don't want to overload the rdtsc operation.
>> Well, in the Xen case it is actually guaranteed to be correct and useful
>> as real time, but that's definitely not something we can expect in
>> general. But you're talking specifically about schedulable vcpu time
>> here, right?
>>
>
> Not schedulable time, scheduled time (schedulable - scheduled) = stolen
I meant "schedulable" (perhaps "usable" would be better) from the
guest's perspective: total amount of real cpu time each vcpu gets. ie:
(real - schedulable) = stolen. So I think we're talking about the same
thing.
J
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/11] Sched clock paravirt op
2007-02-06 23:48 ` Jeremy Fitzhardinge
@ 2007-02-06 23:50 ` Zachary Amsden
0 siblings, 0 replies; 8+ messages in thread
From: Zachary Amsden @ 2007-02-06 23:50 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Andi Kleen, Linux Kernel Mailing List, Andrew Morton,
Rusty Russell, Chris Wright
Jeremy Fitzhardinge wrote:
> Zachary Amsden wrote:
>
>> Jeremy Fitzhardinge wrote:
>>
>>> Zachary Amsden wrote:
>>>
>>>
>>>> Scheduled (or available) time and real time are good notions. Stolen
>>>> time is debatable. But TSC is basically just always wrong. That's
>>>> why I don't want to overload the rdtsc operation.
>>>>
>>> Well, in the Xen case it is actually guaranteed to be correct and useful
>>> as real time, but that's definitely not something we can expect in
>>> general. But you're talking specifically about schedulable vcpu time
>>> here, right?
>>>
>>>
>> Not schedulable time, scheduled time (schedulable - scheduled) = stolen
>>
>
> I meant "schedulable" (perhaps "usable" would be better) from the
> guest's perspective: total amount of real cpu time each vcpu gets. ie:
> (real - schedulable) = stolen. So I think we're talking about the same
> thing.
>
Yes, I think so. The point though, is the for Xen, TSC is real time.
So it won't do for the scheduler, which must be schedulable time.
Zach
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2007-02-06 23:51 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-02-06 3:52 [PATCH 2/11] Sched clock paravirt op Zachary Amsden
2007-02-06 4:00 ` Zachary Amsden
2007-02-06 12:32 ` Andi Kleen
2007-02-06 22:47 ` Zachary Amsden
2007-02-06 23:23 ` Jeremy Fitzhardinge
2007-02-06 23:42 ` Zachary Amsden
2007-02-06 23:48 ` Jeremy Fitzhardinge
2007-02-06 23:50 ` Zachary Amsden
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).