* [PATCH 1/3] i386: extend alternative instruction framwork
2007-02-19 19:01 [PATCH 0/3] extend alternative instruction framework to support more than one alternative Joerg Roedel
@ 2007-02-19 19:04 ` Joerg Roedel
2007-02-19 19:07 ` [PATCH 1/3] x86_64: additions to the i386 alternative extensions to support x86_64 architecture Joerg Roedel
2007-02-19 19:11 ` [PATCH 3/3] optimize get_cycles_sync for Linux as KVM guest Joerg Roedel
2 siblings, 0 replies; 7+ messages in thread
From: Joerg Roedel @ 2007-02-19 19:04 UTC (permalink / raw)
To: discuss; +Cc: linux-kernel, Andi Kleen
[-- Attachment #1: Type: text/plain, Size: 300 bytes --]
From: Joerg Roedel <joerg.roedel@amd.com>
This patch extends the alternative instruction framework to support an
arbitrary number of alternatives on the i386 architecture.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
--
Joerg Roedel
Operating System Research Center
AMD Saxony LLC & Co. KG
[-- Attachment #2: alternative-extension-i386.patch --]
[-- Type: text/plain, Size: 7472 bytes --]
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index 9eca21b..2dedf4b 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -149,19 +149,38 @@ extern u8 __smp_alt_begin[], __smp_alt_end[];
self modifying code. This implies that assymetric systems where
APs have less capabilities than the boot processor are not handled.
Tough. Make sure you disable such features by hand. */
-
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
struct alt_instr *a;
- u8 *instr;
+ u8 *instr = NULL, *new_instr = NULL;
+ u8 instr_len = 0, new_instr_len = 0;
int diff;
DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
for (a = start; a < end; a++) {
- BUG_ON(a->replacementlen > a->instrlen);
- if (!boot_cpu_has(a->cpuid))
+
+ if (a->used == 0) {
+ instr = a->instr;
+ instr_len = a->instrlen;
+ new_instr =
+ boot_cpu_has(a->cpuid) ? a->replacement : NULL;
+ new_instr_len =
+ boot_cpu_has(a->cpuid) ? a->replacementlen : 0;
+ } else if (a->used >= 1 && boot_cpu_has(a->instr_cpuid)) {
+ BUG_ON(instr == NULL);
+ new_instr = a->instr;
+ new_instr_len = a->instrlen;
+ } else if (a->used == 2 && boot_cpu_has(a->cpuid)) {
+ BUG_ON(instr == NULL);
+ new_instr = a->replacement;
+ new_instr_len = a->replacementlen;
+ }
+
+ if (instr == NULL || new_instr == NULL)
continue;
- instr = a->instr;
+
+ BUG_ON(new_instr_len > instr_len);
+
#ifdef CONFIG_X86_64
/* vsyscall code is not mapped yet. resolve it manually. */
if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
@@ -170,9 +189,10 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
__FUNCTION__, a->instr, instr);
}
#endif
- memcpy(instr, a->replacement, a->replacementlen);
- diff = a->instrlen - a->replacementlen;
- nop_out(instr + a->replacementlen, diff);
+ memcpy(instr, new_instr, new_instr_len);
+ diff = instr_len - new_instr_len;
+ nop_out(instr + new_instr_len, diff);
+ instr = new_instr = NULL;
}
}
diff --git a/include/asm-i386/alternative.h b/include/asm-i386/alternative.h
index b8fa955..b899e61 100644
--- a/include/asm-i386/alternative.h
+++ b/include/asm-i386/alternative.h
@@ -7,14 +7,31 @@
#include <linux/stddef.h>
#include <linux/types.h>
+/* struct alt_instr - define replacement sequences
+ *
+ * this struct is used in 2 ways:
+ * - as the first entry for a replace sequence (used == 0)
+ * In this case *instr points to the original instruction and
+ * instr_cpuid is ignored
+ * - as a following entry in a replace sequence (used == [1|2])
+ * In this case *instr is used as a replacement pointer too
+ * (supporting up to two replacements per struct) and
+ * instr_cpuid is its cpuid value
+ *
+ * The first matching replacement in a sequence is used
+ */
struct alt_instr {
u8 *instr; /* original instruction */
u8 *replacement;
+ u8 used; /* count the number of replacements in
+ this struct (only for succeeding entries) */
+ u8 instr_cpuid; /* cpuid bit set if instr is used
+ as replacement */
u8 cpuid; /* cpuid bit set for replacement */
u8 instrlen; /* length of original instruction */
u8 replacementlen; /* length of new instruction, <= instrlen */
- u8 pad;
-};
+ u8 pad[3];
+} __attribute__ ((packed));
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
@@ -36,6 +53,12 @@ static inline void alternatives_smp_switch(int smp) {}
#endif
/*
+ * Use or extend the following macros if you need more than one
+ * output argument in the alternative_io() macro
+ */
+#define ALTERNATIVE_OUTPUT2(a,b) a,b
+
+/*
* Alternative instructions for different CPU types or capabilities.
*
* This allows to use optimized instructions even on generic binary
@@ -53,6 +76,8 @@ static inline void alternatives_smp_switch(int smp) {}
" .align 4\n" \
" .long 661b\n" /* label */ \
" .long 663f\n" /* new instruction */ \
+ " .byte 0x00\n" /* used count */ \
+ " .byte 0x00\n" /* instr_cpuid */ \
" .byte %c0\n" /* feature bit */ \
" .byte 662b-661b\n" /* sourcelen */ \
" .byte 664f-663f\n" /* replacementlen */ \
@@ -77,6 +102,8 @@ static inline void alternatives_smp_switch(int smp) {}
" .align 4\n" \
" .long 661b\n" /* label */ \
" .long 663f\n" /* new instruction */ \
+ " .byte 0x00\n" /* used count */ \
+ " .byte 0x00\n" /* instr_cpuid */ \
" .byte %c0\n" /* feature bit */ \
" .byte 662b-661b\n" /* sourcelen */ \
" .byte 664f-663f\n" /* replacementlen */ \
@@ -85,6 +112,62 @@ static inline void alternatives_smp_switch(int smp) {}
"663:\n\t" newinstr "\n664:\n" /* replacement */\
".previous" :: "i" (feature), ##input)
+/* Like alternative_input, but with a single output argument */
+#define alternative_io(oldinstr, newinstr, feature, output, input...) \
+ asm volatile ("661:\n\t" oldinstr "\n662:\n" \
+ ".section .altinstructions,\"a\"\n" \
+ " .align 4\n" \
+ " .long 661b\n" /* label */ \
+ " .long 663f\n" /* new instruction */ \
+ " .byte 0x00\n" /* first entry */ \
+ " .byte 0x00\n" /* zero for first entry */ \
+ " .byte %c[feat]\n" /* feature bit */ \
+ " .byte 662b-661b\n" /* sourcelen */ \
+ " .byte 664f-663f\n" /* replacementlen */ \
+ ".previous\n" \
+ ".section .altinstr_replacement,\"ax\"\n" \
+ "663:\n\t" newinstr "\n664:\n" /* replacement */ \
+ ".previous" : output : [feat] "i" (feature), ##input)
+
+/*
+ * additional alternatives
+ *
+ * In the case where more than one alternative for an instruction exist,
+ * the two following macros could be used. They must appear immediately
+ * after the use alternative_io, alternative_input or alternative macros.
+ */
+
+#define alternative_add_one(newinstr2, feature2) \
+ asm volatile(".section .altinstructions,\"a\"\n" \
+ " .align 4\n" \
+ " .long 661f\n" \
+ " .long 0x00\n" \
+ " .byte 0x01\n" \
+ " .byte %c[feat2]\n" \
+ " .byte 0x00\n" \
+ " .byte 662f-661f\n" \
+ " .byte 0x00\n" \
+ ".previous\n" \
+ ".section .altinstr_replacement,\"ax\"\n" \
+ "661:\n\t" newinstr2 "\n662:\n" \
+ ".previous" : : [feat2] "i" (feature2) )
+
+#define alternative_add_two(newinstr2, feature2, newinstr3, feature3) \
+ asm volatile(".section .altinstructions,\"a\"\n" \
+ " .align 4\n" \
+ " .long 661f\n" \
+ " .long 663f\n" \
+ " .byte 0x02\n" \
+ " .byte %c[feat2]\n" \
+ " .byte %c[feat3]\n" \
+ " .byte 662f-661f\n" \
+ " .byte 664f-663f\n" \
+ ".previous\n" \
+ ".section .altinstr_replacement,\"ax\"\n" \
+ "661:\n\t" newinstr2 "\n662:\n" \
+ "663:\n\t" newinstr3 "\n664:\n" \
+ ".previous" : : [feat2] "i" (feature2), [feat3] "i" (feature3))
+
/*
* Alternative inline assembly for SMP.
*
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 1/3] x86_64: additions to the i386 alternative extensions to support x86_64 architecture
2007-02-19 19:01 [PATCH 0/3] extend alternative instruction framework to support more than one alternative Joerg Roedel
2007-02-19 19:04 ` [PATCH 1/3] i386: extend alternative instruction framwork Joerg Roedel
@ 2007-02-19 19:07 ` Joerg Roedel
2007-02-19 19:13 ` [discuss] " Joerg Roedel
2007-02-19 21:29 ` Andi Kleen
2007-02-19 19:11 ` [PATCH 3/3] optimize get_cycles_sync for Linux as KVM guest Joerg Roedel
2 siblings, 2 replies; 7+ messages in thread
From: Joerg Roedel @ 2007-02-19 19:07 UTC (permalink / raw)
To: discuss; +Cc: linux-kernel, Andi Kleen
[-- Attachment #1: Type: text/plain, Size: 299 bytes --]
From: Joerg Roedel <joerg.roedel@amd.com>
This patch adds the necessary changes to extend the i386 alternative
instruction framework extension on the x86_64 architecture.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
--
Joerg Roedel
Operating System Research Center
AMD Saxony LLC & Co. KG
[-- Attachment #2: alternative-extension-x86_84.patch --]
[-- Type: text/plain, Size: 6491 bytes --]
diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S
index 9a10a78..4e34c12 100644
--- a/arch/x86_64/lib/clear_page.S
+++ b/arch/x86_64/lib/clear_page.S
@@ -53,6 +53,8 @@ ENDPROC(clear_page)
.align 8
.quad clear_page
.quad 1b
+ .byte 0x00
+ .byte 0x00
.byte X86_FEATURE_REP_GOOD
.byte .Lclear_page_end - clear_page
.byte 2b - 1b
diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S
index 727a5d4..2aa51c2 100644
--- a/arch/x86_64/lib/copy_page.S
+++ b/arch/x86_64/lib/copy_page.S
@@ -113,6 +113,8 @@ ENDPROC(copy_page)
.align 8
.quad copy_page
.quad 1b
+ .byte 0x00
+ .byte 0x00
.byte X86_FEATURE_REP_GOOD
.byte .Lcopy_page_end - copy_page
.byte 2b - 1b
diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S
index 70bebd3..2dd39d1 100644
--- a/arch/x86_64/lib/copy_user.S
+++ b/arch/x86_64/lib/copy_user.S
@@ -27,6 +27,8 @@
.align 8
.quad 0b
.quad 2b
+ .byte 0x00
+ .byte 0x00
.byte \feature /* when feature is set */
.byte 5
.byte 5
diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S
index 0ea0ddc..ee42a08 100644
--- a/arch/x86_64/lib/memcpy.S
+++ b/arch/x86_64/lib/memcpy.S
@@ -123,6 +123,8 @@ ENDPROC(__memcpy)
.align 8
.quad memcpy
.quad 1b
+ .byte 0x00
+ .byte 0x00
.byte X86_FEATURE_REP_GOOD
.byte .Lfinal - memcpy
.byte 2b - 1b
diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S
index 2c59481..814e091 100644
--- a/arch/x86_64/lib/memset.S
+++ b/arch/x86_64/lib/memset.S
@@ -127,6 +127,8 @@ ENDPROC(__memset)
.align 8
.quad memset
.quad 1b
+ .byte 0x00
+ .byte 0x00
.byte X86_FEATURE_REP_GOOD
.byte .Lfinal - memset
.byte 2b - 1b
--- a/include/asm-x86_64/alternative.h
+++ b/include/asm-x86_64/alternative.h
@@ -7,14 +7,31 @@
#include <linux/stddef.h>
#include <asm/cpufeature.h>
+/* struct alt_instr - define replacement sequences
+ *
+ * this struct is used in 2 ways:
+ * - as the first entry for a replace sequence (used == 0)
+ * In this case *instr points to the original instruction and
+ * instr_cpuid is ignored
+ * - as a following entry in a replace sequence (used == [1|2])
+ * In this case *instr is used as a replacement pointer too
+ * (supporting up to two replacements per struct) and
+ * instr_cpuid is its cpuid value
+ *
+ * The first matching replacement in a sequence is used
+ */
struct alt_instr {
u8 *instr; /* original instruction */
u8 *replacement;
+ u8 used; /* count the number of replacements in
+ this struct (only for succeeding entries) */
+ u8 instr_cpuid; /* cpuid bit set if instr is used
+ as replacement */
u8 cpuid; /* cpuid bit set for replacement */
u8 instrlen; /* length of original instruction */
u8 replacementlen; /* length of new instruction, <= instrlen */
- u8 pad[5];
-};
+ u8 pad[3];
+} __attribute__ ((packed));
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
@@ -36,6 +53,13 @@ static inline void alternatives_smp_switch(int smp) {}
#endif
+
+/*
+ * Use or extend the following macros if you need more than one
+ * output argument in the alternative_io() macro
+ */
+#define ALTERNATIVE_OUTPUT2(a,b) a,b
+
/*
* Alternative instructions for different CPU types or capabilities.
*
@@ -54,6 +78,8 @@ static inline void alternatives_smp_switch(int smp) {}
" .align 8\n" \
" .quad 661b\n" /* label */ \
" .quad 663f\n" /* new instruction */ \
+ " .byte 0x00\n" /* first entry */ \
+ " .byte 0x00\n" /* zero for first entry */ \
" .byte %c0\n" /* feature bit */ \
" .byte 662b-661b\n" /* sourcelen */ \
" .byte 664f-663f\n" /* replacementlen */ \
@@ -78,6 +104,8 @@ static inline void alternatives_smp_switch(int smp) {}
" .align 8\n" \
" .quad 661b\n" /* label */ \
" .quad 663f\n" /* new instruction */ \
+ " .byte 0x00\n" /* first entry */ \
+ " .byte 0x00\n" /* zero for first entry */ \
" .byte %c0\n" /* feature bit */ \
" .byte 662b-661b\n" /* sourcelen */ \
" .byte 664f-663f\n" /* replacementlen */ \
@@ -93,6 +121,8 @@ static inline void alternatives_smp_switch(int smp) {}
" .align 8\n" \
" .quad 661b\n" /* label */ \
" .quad 663f\n" /* new instruction */ \
+ " .byte 0x00\n" /* first entry */ \
+ " .byte 0x00\n" /* zero for first entry */ \
" .byte %c[feat]\n" /* feature bit */ \
" .byte 662b-661b\n" /* sourcelen */ \
" .byte 664f-663f\n" /* replacementlen */ \
@@ -102,6 +132,45 @@ static inline void alternatives_smp_switch(int smp) {}
".previous" : output : [feat] "i" (feature), ##input)
/*
+ * additional alternatives
+ *
+ * In the case where more than one alternative for an instruction exist,
+ * the two following macros could be used. They must appear immediately
+ * after the use alternative_io, alternative_input or alternative macros.
+ */
+
+#define alternative_add_one(newinstr2, feature2) \
+ asm volatile(".section .altinstructions,\"a\"\n" \
+ " .align 8\n" \
+ " .quad 661f\n" \
+ " .quad 0x00\n" \
+ " .byte 0x01\n" \
+ " .byte %c[feat2]\n" \
+ " .byte 0x00\n" \
+ " .byte 662f-661f\n" \
+ " .byte 0x00\n" \
+ ".previous\n" \
+ ".section .altinstr_replacement,\"ax\"\n" \
+ "661:\n\t" newinstr2 "\n662:\n" \
+ ".previous" : : [feat2] "i" (feature2) )
+
+#define alternative_add_two(newinstr2, feature2, newinstr3, feature3) \
+ asm volatile(".section .altinstructions,\"a\"\n" \
+ " .align 8\n" \
+ " .quad 661f\n" \
+ " .quad 663f\n" \
+ " .byte 0x02\n" \
+ " .byte %c[feat2]\n" \
+ " .byte %c[feat3]\n" \
+ " .byte 662f-661f\n" \
+ " .byte 664f-663f\n" \
+ ".previous\n" \
+ ".section .altinstr_replacement,\"ax\"\n" \
+ "661:\n\t" newinstr2 "\n662:\n" \
+ "663:\n\t" newinstr3 "\n664:\n" \
+ ".previous" : : [feat2] "i" (feature2), [feat3] "i" (feature3) )
+
+/*
* Alternative inline assembly for SMP.
*
* The LOCK_PREFIX macro defined here replaces the LOCK and
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [discuss] [PATCH 1/3] x86_64: additions to the i386 alternative extensions to support x86_64 architecture
2007-02-19 19:07 ` [PATCH 1/3] x86_64: additions to the i386 alternative extensions to support x86_64 architecture Joerg Roedel
@ 2007-02-19 19:13 ` Joerg Roedel
2007-02-19 21:29 ` Andi Kleen
1 sibling, 0 replies; 7+ messages in thread
From: Joerg Roedel @ 2007-02-19 19:13 UTC (permalink / raw)
To: discuss; +Cc: linux-kernel, Andi Kleen
[Patch 2/3] of course.
--
Joerg Roedel
Operating System Research Center
AMD Saxony LLC & Co. KG
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [discuss] [PATCH 1/3] x86_64: additions to the i386 alternative extensions to support x86_64 architecture
2007-02-19 19:07 ` [PATCH 1/3] x86_64: additions to the i386 alternative extensions to support x86_64 architecture Joerg Roedel
2007-02-19 19:13 ` [discuss] " Joerg Roedel
@ 2007-02-19 21:29 ` Andi Kleen
2007-02-20 20:10 ` Joerg Roedel
1 sibling, 1 reply; 7+ messages in thread
From: Andi Kleen @ 2007-02-19 21:29 UTC (permalink / raw)
To: discuss; +Cc: Joerg Roedel, linux-kernel
On Monday 19 February 2007 20:07, Joerg Roedel wrote:
> From: Joerg Roedel <joerg.roedel@amd.com>
>
> This patch adds the necessary changes to extend the i386 alternative
> instruction framework extension on the x86_64 architecture.
Looks complicated and somewhat fragile.
I think I would prefer it if you changed the pad field to a "bit not set"
cpuid field. Then at least 2 alternatives + a nothing alternative could
be described too.
-Andi
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [discuss] [PATCH 1/3] x86_64: additions to the i386 alternative extensions to support x86_64 architecture
2007-02-19 21:29 ` Andi Kleen
@ 2007-02-20 20:10 ` Joerg Roedel
0 siblings, 0 replies; 7+ messages in thread
From: Joerg Roedel @ 2007-02-20 20:10 UTC (permalink / raw)
To: Andi Kleen; +Cc: discuss, linux-kernel
On Mon, Feb 19, 2007 at 10:29:22PM +0100, Andi Kleen wrote:
> On Monday 19 February 2007 20:07, Joerg Roedel wrote:
> > From: Joerg Roedel <joerg.roedel@amd.com>
> >
> > This patch adds the necessary changes to extend the i386 alternative
> > instruction framework extension on the x86_64 architecture.
>
> Looks complicated and somewhat fragile.
The tests on i386 and x86_64 just work fine :-)
> I think I would prefer it if you changed the pad field to a "bit not set"
> cpuid field. Then at least 2 alternatives + a nothing alternative could
> be described too.
I'm not sure I fully understand what you mean. Do you mean to add a
further alternative instruction to the struct for supporting 2 alternative
instructions?
Joerg
--
Joerg Roedel
Operating System Research Center
AMD Saxony LLC & Co. KG
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 3/3] optimize get_cycles_sync for Linux as KVM guest
2007-02-19 19:01 [PATCH 0/3] extend alternative instruction framework to support more than one alternative Joerg Roedel
2007-02-19 19:04 ` [PATCH 1/3] i386: extend alternative instruction framwork Joerg Roedel
2007-02-19 19:07 ` [PATCH 1/3] x86_64: additions to the i386 alternative extensions to support x86_64 architecture Joerg Roedel
@ 2007-02-19 19:11 ` Joerg Roedel
2 siblings, 0 replies; 7+ messages in thread
From: Joerg Roedel @ 2007-02-19 19:11 UTC (permalink / raw)
To: discuss; +Cc: linux-kernel, Andi Kleen
[-- Attachment #1: Type: text/plain, Size: 460 bytes --]
From: Joerg Roedel <joerg.roedel@amd.com>
This patch modifies the get_cycles_sync() function on i386 and x86_64 to
use the RDTSCP (if it is available) instruction to synchronize with the
CPU core and not CPUID. This is especially usefull when running Linux as
a KVM guest because CPUID is intercepted and will cause a VMEXIT there.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
--
Joerg Roedel
Operating System Research Center
AMD Saxony LLC & Co. KG
[-- Attachment #2: get_cycles_sync-optimization.patch --]
[-- Type: text/plain, Size: 1868 bytes --]
diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h
index 3f92b94..7275e41 100644
--- a/include/asm-i386/cpufeature.h
+++ b/include/asm-i386/cpufeature.h
@@ -49,6 +49,7 @@
#define X86_FEATURE_MP (1*32+19) /* MP Capable. */
#define X86_FEATURE_NX (1*32+20) /* Execute Disable */
#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */
#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */
diff --git a/include/asm-x86_64/tsc.h b/include/asm-x86_64/tsc.h
index 9a0a368..7db952d 100644
--- a/include/asm-x86_64/tsc.h
+++ b/include/asm-x86_64/tsc.h
@@ -34,22 +34,28 @@ static inline cycles_t get_cycles(void)
/* Like get_cycles, but make sure the CPU is synchronized. */
static __always_inline cycles_t get_cycles_sync(void)
{
- unsigned long long ret;
-#ifdef X86_FEATURE_SYNC_RDTSC
+ unsigned int a, d;
unsigned eax;
+#ifdef X86_FEATURE_SYNC_RDTSC
/*
* Don't do an additional sync on CPUs where we know
* RDTSC is already synchronous:
*/
alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC,
"=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
+ /* We use RDTSCP if it is available, no extra CPUID required then */
+ alternative_add_one(ASM_NOP2, X86_FEATURE_RDTSCP);
#else
- sync_core();
+ /* no CPUID required if we use RDTSCP */
+ alternative_io("cpuid", ASM_NOP2, X86_FEATURE_RDTSCP,
+ "=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
#endif
- rdtscll(ret);
- return ret;
+ alternative_io("rdtsc\n" ASM_NOP1, "rdtscp", X86_FEATURE_RDTSCP,
+ ALTERNATIVE_OUTPUT2("=a" (a), "=d" (d)), "0" (1) : "ecx","memory");
+
+ return ((unsigned long long)a) | ((unsigned long long)d) << 32;
}
extern void tsc_init(void);
^ permalink raw reply [flat|nested] 7+ messages in thread