LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH v2 1/2] perf annotate: Fix fused instr logic for assembly functions
@ 2021-09-11  4:38 Ravi Bangoria
  2021-09-11  4:38 ` [PATCH v2 2/2] perf annotate: Add fusion logic for AMD microarchs Ravi Bangoria
  2021-09-11 19:03 ` [PATCH v2 1/2] perf annotate: Fix fused instr logic for assembly functions Arnaldo Carvalho de Melo
  0 siblings, 2 replies; 5+ messages in thread
From: Ravi Bangoria @ 2021-09-11  4:38 UTC (permalink / raw)
  To: acme
  Cc: ravi.bangoria, mark.rutland, alexander.shishkin, jolsa, yao.jin,
	namhyung, kim.phillips, linux-perf-users, linux-kernel

Some x86 microarchitectures fuse a subset of cmp/test/ALU instructions
with branch instructions, and thus perf annotate highlight such valid
pairs as fused.

When annotated with source, perf uses struct disasm_line to contain
either source or instruction line from objdump output. Usually, a C
statement generates multiple instructions which include such
cmp/test/ALU + branch instruction pairs. But in case of assembly
function, each individual assembly source line generate one
instruction. Perf annotate instruction fusion logic assumes previous
disasm_line as previous instruction line, which is wrong because,
for assembly function, previous disasm_line contains source line.
And thus perf fails to highlight valid fused instruction pairs for
assembly functions.

Fix it by searching backward until we find an instruction line and
consider that disasm_line as fused with current branch instruction.

Before:
         │    cmpq    %rcx, RIP+8(%rsp)
    0.00 │      cmp    %rcx,0x88(%rsp)
         │    je      .Lerror_bad_iret      <--- Source line
    0.14 │   ┌──je     b4                   <--- Instruction line
         │   │movl    %ecx, %eax

After:
         │    cmpq    %rcx, RIP+8(%rsp)
    0.00 │   ┌──cmp    %rcx,0x88(%rsp)
         │   │je      .Lerror_bad_iret
    0.14 │   ├──je     b4
         │   │movl    %ecx, %eax

Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
---
 tools/perf/ui/browser.c           | 33 ++++++++++++++++++++++---------
 tools/perf/ui/browser.h           |  2 +-
 tools/perf/ui/browsers/annotate.c | 24 +++++++++++++++-------
 3 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index 781afe42e90e..fa5bd5c20e96 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -757,25 +757,40 @@ void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
 }
 
 void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column,
-			    unsigned int row, bool arrow_down)
+			    unsigned int row, int diff, bool arrow_down)
 {
-	unsigned int end_row;
+	int end_row;
 
-	if (row >= browser->top_idx)
-		end_row = row - browser->top_idx;
-	else
+	if (diff <= 0)
 		return;
 
 	SLsmg_set_char_set(1);
 
 	if (arrow_down) {
+		if (row + diff <= browser->top_idx)
+			return;
+
+		end_row = row + diff - browser->top_idx;
 		ui_browser__gotorc(browser, end_row, column - 1);
-		SLsmg_write_char(SLSMG_ULCORN_CHAR);
-		ui_browser__gotorc(browser, end_row, column);
-		SLsmg_draw_hline(2);
-		ui_browser__gotorc(browser, end_row + 1, column - 1);
 		SLsmg_write_char(SLSMG_LTEE_CHAR);
+
+		while (--end_row >= 0 && end_row > (int)(row - browser->top_idx)) {
+			ui_browser__gotorc(browser, end_row, column - 1);
+			SLsmg_draw_vline(1);
+		}
+
+		end_row = (int)(row - browser->top_idx);
+		if (end_row >= 0) {
+			ui_browser__gotorc(browser, end_row, column - 1);
+			SLsmg_write_char(SLSMG_ULCORN_CHAR);
+			ui_browser__gotorc(browser, end_row, column);
+			SLsmg_draw_hline(2);
+		}
 	} else {
+		if (row < browser->top_idx)
+			return;
+
+		end_row = row - browser->top_idx;
 		ui_browser__gotorc(browser, end_row, column - 1);
 		SLsmg_write_char(SLSMG_LTEE_CHAR);
 		ui_browser__gotorc(browser, end_row, column);
diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h
index 3678eb88f119..510ce4554050 100644
--- a/tools/perf/ui/browser.h
+++ b/tools/perf/ui/browser.h
@@ -51,7 +51,7 @@ void ui_browser__write_graph(struct ui_browser *browser, int graph);
 void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
 			      u64 start, u64 end);
 void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column,
-			    unsigned int row, bool arrow_down);
+			    unsigned int row, int diff, bool arrow_down);
 void __ui_browser__show_title(struct ui_browser *browser, const char *title);
 void ui_browser__show_title(struct ui_browser *browser, const char *title);
 int ui_browser__show(struct ui_browser *browser, const char *title,
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index ef4da4295bf7..e81c2493efdf 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -125,13 +125,20 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 		ab->selection = al;
 }
 
-static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor)
+static int is_fused(struct annotate_browser *ab, struct disasm_line *cursor)
 {
 	struct disasm_line *pos = list_prev_entry(cursor, al.node);
 	const char *name;
+	int diff = 1;
+
+	while (pos && pos->al.offset == -1) {
+		pos = list_prev_entry(pos, al.node);
+		if (!ab->opts->hide_src_code)
+			diff++;
+	}
 
 	if (!pos)
-		return false;
+		return 0;
 
 	if (ins__is_lock(&pos->ins))
 		name = pos->ops.locked.ins.name;
@@ -139,9 +146,11 @@ static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor)
 		name = pos->ins.name;
 
 	if (!name || !cursor->ins.name)
-		return false;
+		return 0;
 
-	return ins__is_fused(ab->arch, name, cursor->ins.name);
+	if (ins__is_fused(ab->arch, name, cursor->ins.name))
+		return diff;
+	return 0;
 }
 
 static void annotate_browser__draw_current_jump(struct ui_browser *browser)
@@ -155,6 +164,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 	struct annotation *notes = symbol__annotation(sym);
 	u8 pcnt_width = annotation__pcnt_width(notes);
 	int width;
+	int diff = 0;
 
 	/* PLT symbols contain external offsets */
 	if (strstr(sym->name, "@plt"))
@@ -205,11 +215,11 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 				 pcnt_width + 2 + notes->widths.addr + width,
 				 from, to);
 
-	if (is_fused(ab, cursor)) {
+	diff = is_fused(ab, cursor);
+	if (diff > 0) {
 		ui_browser__mark_fused(browser,
 				       pcnt_width + 3 + notes->widths.addr + width,
-				       from - 1,
-				       to > from);
+				       from - diff, diff, to > from);
 	}
 }
 
-- 
2.27.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2 2/2] perf annotate: Add fusion logic for AMD microarchs
  2021-09-11  4:38 [PATCH v2 1/2] perf annotate: Fix fused instr logic for assembly functions Ravi Bangoria
@ 2021-09-11  4:38 ` Ravi Bangoria
  2021-09-11 19:03 ` [PATCH v2 1/2] perf annotate: Fix fused instr logic for assembly functions Arnaldo Carvalho de Melo
  1 sibling, 0 replies; 5+ messages in thread
From: Ravi Bangoria @ 2021-09-11  4:38 UTC (permalink / raw)
  To: acme
  Cc: ravi.bangoria, mark.rutland, alexander.shishkin, jolsa, yao.jin,
	namhyung, kim.phillips, linux-perf-users, linux-kernel

AMD family 15h and above microarchs fuse a subset of cmp/test/ALU
instructions with branch instructions[1][2]. Add perf annotate
fused instruction support for these microarchs.

Before:
         │       testb  $0x80,0x51(%rax)
         │    ┌──jne    5b3
    0.78 │    │  mov    %r13,%rdi
         │    │→ callq  mark_page_accessed
    1.08 │5b3:└─→mov    0x8(%r13),%rax

After:
         │    ┌──testb  $0x80,0x51(%rax)
         │    ├──jne    5b3
    0.78 │    │  mov    %r13,%rdi
         │    │→ callq  mark_page_accessed
    1.08 │5b3:└─→mov    0x8(%r13),%rax

[1] https://bugzilla.kernel.org/attachment.cgi?id=298553
[2] https://bugzilla.kernel.org/attachment.cgi?id=298555

Reported-by: Kim Phillips <kim.phillips@amd.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
---
v1->v2:
  - Don't store vendor id. Instead, directly assign function pointer
    based on verndor id. (acme)

 tools/perf/arch/x86/annotate/instructions.c | 28 ++++++++++++++++++++-
 tools/perf/util/annotate.c                  |  1 -
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
index 24ea12ec7e02..305872692bfd 100644
--- a/tools/perf/arch/x86/annotate/instructions.c
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -144,8 +144,31 @@ static struct ins x86__instructions[] = {
 	{ .name = "xorps",	.ops = &mov_ops, },
 };
 
-static bool x86__ins_is_fused(struct arch *arch, const char *ins1,
+static bool amd__ins_is_fused(struct arch *arch, const char *ins1,
 			      const char *ins2)
+{
+	if (strstr(ins2, "jmp"))
+		return false;
+
+	/* Family >= 15h supports cmp/test + branch fusion */
+	if (arch->family >= 0x15 && (strstarts(ins1, "test") ||
+	    (strstarts(ins1, "cmp") && !strstr(ins1, "xchg")))) {
+		return true;
+	}
+
+	/* Family >= 19h supports some ALU + branch fusion */
+	if (arch->family >= 0x19 && (strstarts(ins1, "add") ||
+	    strstarts(ins1, "sub") || strstarts(ins1, "and") ||
+	    strstarts(ins1, "inc") || strstarts(ins1, "dec") ||
+	    strstarts(ins1, "or") || strstarts(ins1, "xor"))) {
+		return true;
+	}
+
+	return false;
+}
+
+static bool intel__ins_is_fused(struct arch *arch, const char *ins1,
+				const char *ins2)
 {
 	if (arch->family != 6 || arch->model < 0x1e || strstr(ins2, "jmp"))
 		return false;
@@ -184,6 +207,9 @@ static int x86__cpuid_parse(struct arch *arch, char *cpuid)
 	if (ret == 3) {
 		arch->family = family;
 		arch->model = model;
+		arch->ins_is_fused = strstarts(cpuid, "AuthenticAMD") ?
+					amd__ins_is_fused :
+					intel__ins_is_fused;
 		return 0;
 	}
 
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 0bae061b2d6d..b55f35485e43 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -183,7 +183,6 @@ static struct arch architectures[] = {
 		.init = x86__annotate_init,
 		.instructions = x86__instructions,
 		.nr_instructions = ARRAY_SIZE(x86__instructions),
-		.ins_is_fused = x86__ins_is_fused,
 		.objdump =  {
 			.comment_char = '#',
 		},
-- 
2.27.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 1/2] perf annotate: Fix fused instr logic for assembly functions
  2021-09-11  4:38 [PATCH v2 1/2] perf annotate: Fix fused instr logic for assembly functions Ravi Bangoria
  2021-09-11  4:38 ` [PATCH v2 2/2] perf annotate: Add fusion logic for AMD microarchs Ravi Bangoria
@ 2021-09-11 19:03 ` Arnaldo Carvalho de Melo
  2021-09-13  1:54   ` Jin, Yao
  1 sibling, 1 reply; 5+ messages in thread
From: Arnaldo Carvalho de Melo @ 2021-09-11 19:03 UTC (permalink / raw)
  To: Jin Yao, Ravi Bangoria
  Cc: mark.rutland, alexander.shishkin, jolsa, namhyung, kim.phillips,
	linux-perf-users, linux-kernel

Em Sat, Sep 11, 2021 at 10:08:53AM +0530, Ravi Bangoria escreveu:
> Some x86 microarchitectures fuse a subset of cmp/test/ALU instructions
> with branch instructions, and thus perf annotate highlight such valid
> pairs as fused.

Jin, are you ok with this? Can I have your reviewed-by?

- Arnaldo
 
> When annotated with source, perf uses struct disasm_line to contain
> either source or instruction line from objdump output. Usually, a C
> statement generates multiple instructions which include such
> cmp/test/ALU + branch instruction pairs. But in case of assembly
> function, each individual assembly source line generate one
> instruction. Perf annotate instruction fusion logic assumes previous
> disasm_line as previous instruction line, which is wrong because,
> for assembly function, previous disasm_line contains source line.
> And thus perf fails to highlight valid fused instruction pairs for
> assembly functions.
> 
> Fix it by searching backward until we find an instruction line and
> consider that disasm_line as fused with current branch instruction.
> 
> Before:
>          │    cmpq    %rcx, RIP+8(%rsp)
>     0.00 │      cmp    %rcx,0x88(%rsp)
>          │    je      .Lerror_bad_iret      <--- Source line
>     0.14 │   ┌──je     b4                   <--- Instruction line
>          │   │movl    %ecx, %eax
> 
> After:
>          │    cmpq    %rcx, RIP+8(%rsp)
>     0.00 │   ┌──cmp    %rcx,0x88(%rsp)
>          │   │je      .Lerror_bad_iret
>     0.14 │   ├──je     b4
>          │   │movl    %ecx, %eax
> 
> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
> ---
>  tools/perf/ui/browser.c           | 33 ++++++++++++++++++++++---------
>  tools/perf/ui/browser.h           |  2 +-
>  tools/perf/ui/browsers/annotate.c | 24 +++++++++++++++-------
>  3 files changed, 42 insertions(+), 17 deletions(-)
> 
> diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
> index 781afe42e90e..fa5bd5c20e96 100644
> --- a/tools/perf/ui/browser.c
> +++ b/tools/perf/ui/browser.c
> @@ -757,25 +757,40 @@ void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
>  }
>  
>  void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column,
> -			    unsigned int row, bool arrow_down)
> +			    unsigned int row, int diff, bool arrow_down)
>  {
> -	unsigned int end_row;
> +	int end_row;
>  
> -	if (row >= browser->top_idx)
> -		end_row = row - browser->top_idx;
> -	else
> +	if (diff <= 0)
>  		return;
>  
>  	SLsmg_set_char_set(1);
>  
>  	if (arrow_down) {
> +		if (row + diff <= browser->top_idx)
> +			return;
> +
> +		end_row = row + diff - browser->top_idx;
>  		ui_browser__gotorc(browser, end_row, column - 1);
> -		SLsmg_write_char(SLSMG_ULCORN_CHAR);
> -		ui_browser__gotorc(browser, end_row, column);
> -		SLsmg_draw_hline(2);
> -		ui_browser__gotorc(browser, end_row + 1, column - 1);
>  		SLsmg_write_char(SLSMG_LTEE_CHAR);
> +
> +		while (--end_row >= 0 && end_row > (int)(row - browser->top_idx)) {
> +			ui_browser__gotorc(browser, end_row, column - 1);
> +			SLsmg_draw_vline(1);
> +		}
> +
> +		end_row = (int)(row - browser->top_idx);
> +		if (end_row >= 0) {
> +			ui_browser__gotorc(browser, end_row, column - 1);
> +			SLsmg_write_char(SLSMG_ULCORN_CHAR);
> +			ui_browser__gotorc(browser, end_row, column);
> +			SLsmg_draw_hline(2);
> +		}
>  	} else {
> +		if (row < browser->top_idx)
> +			return;
> +
> +		end_row = row - browser->top_idx;
>  		ui_browser__gotorc(browser, end_row, column - 1);
>  		SLsmg_write_char(SLSMG_LTEE_CHAR);
>  		ui_browser__gotorc(browser, end_row, column);
> diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h
> index 3678eb88f119..510ce4554050 100644
> --- a/tools/perf/ui/browser.h
> +++ b/tools/perf/ui/browser.h
> @@ -51,7 +51,7 @@ void ui_browser__write_graph(struct ui_browser *browser, int graph);
>  void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
>  			      u64 start, u64 end);
>  void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column,
> -			    unsigned int row, bool arrow_down);
> +			    unsigned int row, int diff, bool arrow_down);
>  void __ui_browser__show_title(struct ui_browser *browser, const char *title);
>  void ui_browser__show_title(struct ui_browser *browser, const char *title);
>  int ui_browser__show(struct ui_browser *browser, const char *title,
> diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
> index ef4da4295bf7..e81c2493efdf 100644
> --- a/tools/perf/ui/browsers/annotate.c
> +++ b/tools/perf/ui/browsers/annotate.c
> @@ -125,13 +125,20 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
>  		ab->selection = al;
>  }
>  
> -static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor)
> +static int is_fused(struct annotate_browser *ab, struct disasm_line *cursor)
>  {
>  	struct disasm_line *pos = list_prev_entry(cursor, al.node);
>  	const char *name;
> +	int diff = 1;
> +
> +	while (pos && pos->al.offset == -1) {
> +		pos = list_prev_entry(pos, al.node);
> +		if (!ab->opts->hide_src_code)
> +			diff++;
> +	}
>  
>  	if (!pos)
> -		return false;
> +		return 0;
>  
>  	if (ins__is_lock(&pos->ins))
>  		name = pos->ops.locked.ins.name;
> @@ -139,9 +146,11 @@ static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor)
>  		name = pos->ins.name;
>  
>  	if (!name || !cursor->ins.name)
> -		return false;
> +		return 0;
>  
> -	return ins__is_fused(ab->arch, name, cursor->ins.name);
> +	if (ins__is_fused(ab->arch, name, cursor->ins.name))
> +		return diff;
> +	return 0;
>  }
>  
>  static void annotate_browser__draw_current_jump(struct ui_browser *browser)
> @@ -155,6 +164,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
>  	struct annotation *notes = symbol__annotation(sym);
>  	u8 pcnt_width = annotation__pcnt_width(notes);
>  	int width;
> +	int diff = 0;
>  
>  	/* PLT symbols contain external offsets */
>  	if (strstr(sym->name, "@plt"))
> @@ -205,11 +215,11 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
>  				 pcnt_width + 2 + notes->widths.addr + width,
>  				 from, to);
>  
> -	if (is_fused(ab, cursor)) {
> +	diff = is_fused(ab, cursor);
> +	if (diff > 0) {
>  		ui_browser__mark_fused(browser,
>  				       pcnt_width + 3 + notes->widths.addr + width,
> -				       from - 1,
> -				       to > from);
> +				       from - diff, diff, to > from);
>  	}
>  }
>  
> -- 
> 2.27.0

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 1/2] perf annotate: Fix fused instr logic for assembly functions
  2021-09-11 19:03 ` [PATCH v2 1/2] perf annotate: Fix fused instr logic for assembly functions Arnaldo Carvalho de Melo
@ 2021-09-13  1:54   ` Jin, Yao
  2021-09-13 19:53     ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 5+ messages in thread
From: Jin, Yao @ 2021-09-13  1:54 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ravi Bangoria
  Cc: mark.rutland, alexander.shishkin, jolsa, namhyung, kim.phillips,
	linux-perf-users, linux-kernel

Hi Arnaldo, Ravi

On 9/12/2021 3:03 AM, Arnaldo Carvalho de Melo wrote:
> Em Sat, Sep 11, 2021 at 10:08:53AM +0530, Ravi Bangoria escreveu:
>> Some x86 microarchitectures fuse a subset of cmp/test/ALU instructions
>> with branch instructions, and thus perf annotate highlight such valid
>> pairs as fused.
> 
> Jin, are you ok with this? Can I have your reviewed-by?
> 
> - Arnaldo
>   

Oh, my original patch could only handle the case like:

cmp xxx
je  aaa

But it didn't consider Ravi's case something like:

cmp xxx
cmp yyy
je  aaa
je  bbb

Thanks for Ravi fixing this issue! Backward searching is probably a better solution.

Frankly I can't reproduce Ravi's case, but for my test suite, Ravi's patch works as well.

Reviewed-by: Jin Yao <yao.jin@linux.intel.com>

Thanks
Jin Yao

>> When annotated with source, perf uses struct disasm_line to contain
>> either source or instruction line from objdump output. Usually, a C
>> statement generates multiple instructions which include such
>> cmp/test/ALU + branch instruction pairs. But in case of assembly
>> function, each individual assembly source line generate one
>> instruction. Perf annotate instruction fusion logic assumes previous
>> disasm_line as previous instruction line, which is wrong because,
>> for assembly function, previous disasm_line contains source line.
>> And thus perf fails to highlight valid fused instruction pairs for
>> assembly functions.
>>
>> Fix it by searching backward until we find an instruction line and
>> consider that disasm_line as fused with current branch instruction.
>>
>> Before:
>>           │    cmpq    %rcx, RIP+8(%rsp)
>>      0.00 │      cmp    %rcx,0x88(%rsp)
>>           │    je      .Lerror_bad_iret      <--- Source line
>>      0.14 │   ┌──je     b4                   <--- Instruction line
>>           │   │movl    %ecx, %eax
>>
>> After:
>>           │    cmpq    %rcx, RIP+8(%rsp)
>>      0.00 │   ┌──cmp    %rcx,0x88(%rsp)
>>           │   │je      .Lerror_bad_iret
>>      0.14 │   ├──je     b4
>>           │   │movl    %ecx, %eax
>>
>> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
>> ---
>>   tools/perf/ui/browser.c           | 33 ++++++++++++++++++++++---------
>>   tools/perf/ui/browser.h           |  2 +-
>>   tools/perf/ui/browsers/annotate.c | 24 +++++++++++++++-------
>>   3 files changed, 42 insertions(+), 17 deletions(-)
>>
>> diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
>> index 781afe42e90e..fa5bd5c20e96 100644
>> --- a/tools/perf/ui/browser.c
>> +++ b/tools/perf/ui/browser.c
>> @@ -757,25 +757,40 @@ void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
>>   }
>>   
>>   void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column,
>> -			    unsigned int row, bool arrow_down)
>> +			    unsigned int row, int diff, bool arrow_down)
>>   {
>> -	unsigned int end_row;
>> +	int end_row;
>>   
>> -	if (row >= browser->top_idx)
>> -		end_row = row - browser->top_idx;
>> -	else
>> +	if (diff <= 0)
>>   		return;
>>   
>>   	SLsmg_set_char_set(1);
>>   
>>   	if (arrow_down) {
>> +		if (row + diff <= browser->top_idx)
>> +			return;
>> +
>> +		end_row = row + diff - browser->top_idx;
>>   		ui_browser__gotorc(browser, end_row, column - 1);
>> -		SLsmg_write_char(SLSMG_ULCORN_CHAR);
>> -		ui_browser__gotorc(browser, end_row, column);
>> -		SLsmg_draw_hline(2);
>> -		ui_browser__gotorc(browser, end_row + 1, column - 1);
>>   		SLsmg_write_char(SLSMG_LTEE_CHAR);
>> +
>> +		while (--end_row >= 0 && end_row > (int)(row - browser->top_idx)) {
>> +			ui_browser__gotorc(browser, end_row, column - 1);
>> +			SLsmg_draw_vline(1);
>> +		}
>> +
>> +		end_row = (int)(row - browser->top_idx);
>> +		if (end_row >= 0) {
>> +			ui_browser__gotorc(browser, end_row, column - 1);
>> +			SLsmg_write_char(SLSMG_ULCORN_CHAR);
>> +			ui_browser__gotorc(browser, end_row, column);
>> +			SLsmg_draw_hline(2);
>> +		}
>>   	} else {
>> +		if (row < browser->top_idx)
>> +			return;
>> +
>> +		end_row = row - browser->top_idx;
>>   		ui_browser__gotorc(browser, end_row, column - 1);
>>   		SLsmg_write_char(SLSMG_LTEE_CHAR);
>>   		ui_browser__gotorc(browser, end_row, column);
>> diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h
>> index 3678eb88f119..510ce4554050 100644
>> --- a/tools/perf/ui/browser.h
>> +++ b/tools/perf/ui/browser.h
>> @@ -51,7 +51,7 @@ void ui_browser__write_graph(struct ui_browser *browser, int graph);
>>   void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
>>   			      u64 start, u64 end);
>>   void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column,
>> -			    unsigned int row, bool arrow_down);
>> +			    unsigned int row, int diff, bool arrow_down);
>>   void __ui_browser__show_title(struct ui_browser *browser, const char *title);
>>   void ui_browser__show_title(struct ui_browser *browser, const char *title);
>>   int ui_browser__show(struct ui_browser *browser, const char *title,
>> diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
>> index ef4da4295bf7..e81c2493efdf 100644
>> --- a/tools/perf/ui/browsers/annotate.c
>> +++ b/tools/perf/ui/browsers/annotate.c
>> @@ -125,13 +125,20 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
>>   		ab->selection = al;
>>   }
>>   
>> -static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor)
>> +static int is_fused(struct annotate_browser *ab, struct disasm_line *cursor)
>>   {
>>   	struct disasm_line *pos = list_prev_entry(cursor, al.node);
>>   	const char *name;
>> +	int diff = 1;
>> +
>> +	while (pos && pos->al.offset == -1) {
>> +		pos = list_prev_entry(pos, al.node);
>> +		if (!ab->opts->hide_src_code)
>> +			diff++;
>> +	}
>>   
>>   	if (!pos)
>> -		return false;
>> +		return 0;
>>   
>>   	if (ins__is_lock(&pos->ins))
>>   		name = pos->ops.locked.ins.name;
>> @@ -139,9 +146,11 @@ static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor)
>>   		name = pos->ins.name;
>>   
>>   	if (!name || !cursor->ins.name)
>> -		return false;
>> +		return 0;
>>   
>> -	return ins__is_fused(ab->arch, name, cursor->ins.name);
>> +	if (ins__is_fused(ab->arch, name, cursor->ins.name))
>> +		return diff;
>> +	return 0;
>>   }
>>   
>>   static void annotate_browser__draw_current_jump(struct ui_browser *browser)
>> @@ -155,6 +164,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
>>   	struct annotation *notes = symbol__annotation(sym);
>>   	u8 pcnt_width = annotation__pcnt_width(notes);
>>   	int width;
>> +	int diff = 0;
>>   
>>   	/* PLT symbols contain external offsets */
>>   	if (strstr(sym->name, "@plt"))
>> @@ -205,11 +215,11 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
>>   				 pcnt_width + 2 + notes->widths.addr + width,
>>   				 from, to);
>>   
>> -	if (is_fused(ab, cursor)) {
>> +	diff = is_fused(ab, cursor);
>> +	if (diff > 0) {
>>   		ui_browser__mark_fused(browser,
>>   				       pcnt_width + 3 + notes->widths.addr + width,
>> -				       from - 1,
>> -				       to > from);
>> +				       from - diff, diff, to > from);
>>   	}
>>   }
>>   
>> -- 
>> 2.27.0
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 1/2] perf annotate: Fix fused instr logic for assembly functions
  2021-09-13  1:54   ` Jin, Yao
@ 2021-09-13 19:53     ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 5+ messages in thread
From: Arnaldo Carvalho de Melo @ 2021-09-13 19:53 UTC (permalink / raw)
  To: Jin, Yao
  Cc: Ravi Bangoria, mark.rutland, alexander.shishkin, jolsa, namhyung,
	kim.phillips, linux-perf-users, linux-kernel

Em Mon, Sep 13, 2021 at 09:54:00AM +0800, Jin, Yao escreveu:
> Hi Arnaldo, Ravi
> 
> On 9/12/2021 3:03 AM, Arnaldo Carvalho de Melo wrote:
> > Em Sat, Sep 11, 2021 at 10:08:53AM +0530, Ravi Bangoria escreveu:
> > > Some x86 microarchitectures fuse a subset of cmp/test/ALU instructions
> > > with branch instructions, and thus perf annotate highlight such valid
> > > pairs as fused.
> > 
> > Jin, are you ok with this? Can I have your reviewed-by?
> > 
> > - Arnaldo
> 
> Oh, my original patch could only handle the case like:
> 
> cmp xxx
> je  aaa
> 
> But it didn't consider Ravi's case something like:
> 
> cmp xxx
> cmp yyy
> je  aaa
> je  bbb
> 
> Thanks for Ravi fixing this issue! Backward searching is probably a better solution.
> 
> Frankly I can't reproduce Ravi's case, but for my test suite, Ravi's patch works as well.
> 
> Reviewed-by: Jin Yao <yao.jin@linux.intel.com>

Thanks, applied.

- Arnaldo


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-09-13 19:53 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-11  4:38 [PATCH v2 1/2] perf annotate: Fix fused instr logic for assembly functions Ravi Bangoria
2021-09-11  4:38 ` [PATCH v2 2/2] perf annotate: Add fusion logic for AMD microarchs Ravi Bangoria
2021-09-11 19:03 ` [PATCH v2 1/2] perf annotate: Fix fused instr logic for assembly functions Arnaldo Carvalho de Melo
2021-09-13  1:54   ` Jin, Yao
2021-09-13 19:53     ` Arnaldo Carvalho de Melo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).