LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH v3] perf mem: enable sampling loads and stores simultaneously
@ 2014-12-17 15:23 Stephane Eranian
  2014-12-17 16:07 ` Arnaldo Carvalho de Melo
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Stephane Eranian @ 2014-12-17 15:23 UTC (permalink / raw)
  To: linux-kernel
  Cc: peterz, mingo, ak, jolsa, acme, dsahern, dzickus, rfowles,
	jmario, namhyung


This patch modifies perf mem to default to sampling loads
and stores simultaneously. It could only do one or the other
before yet there was no hardware restriction preventing
simultaneous collection. With this patch, one run is sufficient
to collect both.

It is still possible to sample only loads or stores by using the
-t option:
 $ perf mem -t load rec
 $ perf mem -t load rep
Or
 $ perf mem -t store rec
 $ perf mem -t store rep

The perf report TUI will show one event at a time. The store
output will contain a Weight column which will be empty.

In V2, we updated the man pages to reflect the change and
also simplify the initialization of the argv vector passed
to the cmd_*() functions as per LKML feedback.

In V3, we fixed typos in the changelog.

Signed-off-by: Stephane Eranian <eranian@google.com>
---
 tools/perf/Documentation/perf-mem.txt |   9 +--
 tools/perf/builtin-mem.c              | 123 +++++++++++++++++++++++++++-------
 2 files changed, 105 insertions(+), 27 deletions(-)

diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 1d78a40..43310d8 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -12,11 +12,12 @@ SYNOPSIS
 
 DESCRIPTION
 -----------
-"perf mem -t <TYPE> record" runs a command and gathers memory operation data
+"perf mem record" runs a command and gathers memory operation data
 from it, into perf.data. Perf record options are accepted and are passed through.
 
-"perf mem -t <TYPE> report" displays the result. It invokes perf report with the
-right set of options to display a memory access profile.
+"perf mem report" displays the result. It invokes perf report with the
+right set of options to display a memory access profile. By default, loads
+and stores are sampled. Use the -t option to limit to loads or stores.
 
 Note that on Intel systems the memory latency reported is the use-latency,
 not the pure load (or store latency). Use latency includes any pipeline
@@ -29,7 +30,7 @@ OPTIONS
 
 -t::
 --type=::
-	Select the memory operation type: load or store (default: load)
+	Select the memory operation type: load or store (default: load,store)
 
 -D::
 --dump-raw-samples=::
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 24db6ff..1eded0a 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -7,10 +7,13 @@
 #include "util/session.h"
 #include "util/data.h"
 
-#define MEM_OPERATION_LOAD	"load"
-#define MEM_OPERATION_STORE	"store"
+#define MEM_OPERATION_LOAD	0x1
+#define MEM_OPERATION_STORE	0x2
 
-static const char	*mem_operation		= MEM_OPERATION_LOAD;
+/*
+ * default to both load an store sampling
+ */
+static int mem_operation = MEM_OPERATION_LOAD | MEM_OPERATION_STORE;
 
 struct perf_mem {
 	struct perf_tool	tool;
@@ -25,26 +28,30 @@ static int __cmd_record(int argc, const char **argv)
 {
 	int rec_argc, i = 0, j;
 	const char **rec_argv;
-	char event[64];
 	int ret;
 
-	rec_argc = argc + 4;
+	rec_argc = argc + 7; /* max number of arguments */
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
 	if (!rec_argv)
 		return -1;
 
-	rec_argv[i++] = strdup("record");
-	if (!strcmp(mem_operation, MEM_OPERATION_LOAD))
-		rec_argv[i++] = strdup("-W");
-	rec_argv[i++] = strdup("-d");
-	rec_argv[i++] = strdup("-e");
+	rec_argv[i++] = "record";
 
-	if (strcmp(mem_operation, MEM_OPERATION_LOAD))
-		sprintf(event, "cpu/mem-stores/pp");
-	else
-		sprintf(event, "cpu/mem-loads/pp");
+	if (mem_operation & MEM_OPERATION_LOAD)
+		rec_argv[i++] = "-W";
+
+	rec_argv[i++] = "-d";
+
+	if (mem_operation & MEM_OPERATION_LOAD) {
+		rec_argv[i++] = "-e";
+		rec_argv[i++] = "cpu/mem-loads/pp";
+	}
+
+	if (mem_operation & MEM_OPERATION_STORE) {
+		rec_argv[i++] = "-e";
+		rec_argv[i++] = "cpu/mem-stores/pp";
+	}
 
-	rec_argv[i++] = strdup(event);
 	for (j = 1; j < argc; j++, i++)
 		rec_argv[i] = argv[j];
 
@@ -162,17 +169,17 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
 	if (!rep_argv)
 		return -1;
 
-	rep_argv[i++] = strdup("report");
-	rep_argv[i++] = strdup("--mem-mode");
-	rep_argv[i++] = strdup("-n"); /* display number of samples */
+	rep_argv[i++] = "report";
+	rep_argv[i++] = "--mem-mode";
+	rep_argv[i++] = "-n"; /* display number of samples */
 
 	/*
 	 * there is no weight (cost) associated with stores, so don't print
 	 * the column
 	 */
-	if (strcmp(mem_operation, MEM_OPERATION_LOAD))
-		rep_argv[i++] = strdup("--sort=mem,sym,dso,symbol_daddr,"
-				       "dso_daddr,tlb,locked");
+	if (!(mem_operation & MEM_OPERATION_LOAD))
+		rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
+				"dso_daddr,tlb,locked";
 
 	for (j = 1; j < argc; j++, i++)
 		rep_argv[i] = argv[j];
@@ -182,6 +189,75 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
 	return ret;
 }
 
+struct mem_mode {
+	const char *name;
+	int mode;
+};
+
+#define MEM_OPT(n, m) \
+	{ .name = n, .mode = (m) }
+
+#define MEM_END { .name = NULL }
+
+static const struct mem_mode mem_modes[]={
+	MEM_OPT("load", MEM_OPERATION_LOAD),
+	MEM_OPT("store", MEM_OPERATION_STORE),
+	MEM_END
+};
+
+static int
+parse_mem_ops(const struct option *opt, const char *str, int unset)
+{
+	int *mode = (int *)opt->value;
+	const struct mem_mode *m;
+	char *s, *os = NULL, *p;
+	int ret = -1;
+
+	if (unset)
+		return 0;
+
+	/* str may be NULL in case no arg is passed to -t */
+	if (str) {
+		/* because str is read-only */
+		s = os = strdup(str);
+		if (!s)
+			return -1;
+
+		/* reset mode */
+		*mode = 0;
+
+		for (;;) {
+			p = strchr(s, ',');
+			if (p)
+				*p = '\0';
+
+			for (m = mem_modes; m->name; m++) {
+				if (!strcasecmp(s, m->name))
+					break;
+			}
+			if (!m->name) {
+				fprintf(stderr, "unknown sampling op %s,"
+					    " check man page\n", s);
+				goto error;
+			}
+
+			*mode |= m->mode;
+
+			if (!p)
+				break;
+
+			s = p + 1;
+		}
+	}
+	ret = 0;
+
+	if (*mode == 0)
+		*mode = MEM_OPERATION_LOAD;
+error:
+	free(os);
+	return ret;
+}
+
 int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	struct stat st;
@@ -199,8 +275,9 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
 		.input_name		 = "perf.data",
 	};
 	const struct option mem_options[] = {
-	OPT_STRING('t', "type", &mem_operation,
-		   "type", "memory operations(load/store)"),
+	OPT_CALLBACK('t', "type", &mem_operation,
+		   "type", "memory operations(load,store) Default load,store",
+		    parse_mem_ops),
 	OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw,
 		    "dump raw samples in ASCII"),
 	OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved,
-- 
1.9.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] perf mem: enable sampling loads and stores simultaneously
  2014-12-17 15:23 [PATCH v3] perf mem: enable sampling loads and stores simultaneously Stephane Eranian
@ 2014-12-17 16:07 ` Arnaldo Carvalho de Melo
  2014-12-17 16:38   ` Stephane Eranian
  2014-12-18  4:52 ` Namhyung Kim
  2015-01-28 15:03 ` [tip:perf/core] perf mem: Enable " tip-bot for Stephane Eranian
  2 siblings, 1 reply; 6+ messages in thread
From: Arnaldo Carvalho de Melo @ 2014-12-17 16:07 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: linux-kernel, peterz, mingo, ak, jolsa, dsahern, dzickus,
	rfowles, jmario, namhyung

[-- Attachment #1: Type: text/plain, Size: 7838 bytes --]

Em Wed, Dec 17, 2014 at 04:23:55PM +0100, Stephane Eranian escreveu:
> 
> This patch modifies perf mem to default to sampling loads
> and stores simultaneously. It could only do one or the other
> before yet there was no hardware restriction preventing
> simultaneous collection. With this patch, one run is sufficient
> to collect both.
> 
> It is still possible to sample only loads or stores by using the
> -t option:
>  $ perf mem -t load rec
>  $ perf mem -t load rep
> Or
>  $ perf mem -t store rec
>  $ perf mem -t store rep
> 
> The perf report TUI will show one event at a time. The store
> output will contain a Weight column which will be empty.
> 
> In V2, we updated the man pages to reflect the change and
> also simplify the initialization of the argv vector passed
> to the cmd_*() functions as per LKML feedback.
> 
> In V3, we fixed typos in the changelog.
> 
> Signed-off-by: Stephane Eranian <eranian@google.com>

So here it goes a v4, just a minor change to avoid adding another global
variable and make this operation parm like the other parms inside that
struct perf_mem, attached goes the interdiff from your patch to this one
here, ok?

diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 1d78a4064da4..43310d8661fe 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -12,11 +12,12 @@ SYNOPSIS
 
 DESCRIPTION
 -----------
-"perf mem -t <TYPE> record" runs a command and gathers memory operation data
+"perf mem record" runs a command and gathers memory operation data
 from it, into perf.data. Perf record options are accepted and are passed through.
 
-"perf mem -t <TYPE> report" displays the result. It invokes perf report with the
-right set of options to display a memory access profile.
+"perf mem report" displays the result. It invokes perf report with the
+right set of options to display a memory access profile. By default, loads
+and stores are sampled. Use the -t option to limit to loads or stores.
 
 Note that on Intel systems the memory latency reported is the use-latency,
 not the pure load (or store latency). Use latency includes any pipeline
@@ -29,7 +30,7 @@ OPTIONS
 
 -t::
 --type=::
-	Select the memory operation type: load or store (default: load)
+	Select the memory operation type: load or store (default: load,store)
 
 -D::
 --dump-raw-samples=::
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 24db6ffe2957..9b5663950a4d 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -7,44 +7,47 @@
 #include "util/session.h"
 #include "util/data.h"
 
-#define MEM_OPERATION_LOAD	"load"
-#define MEM_OPERATION_STORE	"store"
-
-static const char	*mem_operation		= MEM_OPERATION_LOAD;
+#define MEM_OPERATION_LOAD	0x1
+#define MEM_OPERATION_STORE	0x2
 
 struct perf_mem {
 	struct perf_tool	tool;
 	char const		*input_name;
 	bool			hide_unresolved;
 	bool			dump_raw;
+	int			operation;
 	const char		*cpu_list;
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 };
 
-static int __cmd_record(int argc, const char **argv)
+static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 {
 	int rec_argc, i = 0, j;
 	const char **rec_argv;
-	char event[64];
 	int ret;
 
-	rec_argc = argc + 4;
+	rec_argc = argc + 7; /* max number of arguments */
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
 	if (!rec_argv)
 		return -1;
 
-	rec_argv[i++] = strdup("record");
-	if (!strcmp(mem_operation, MEM_OPERATION_LOAD))
-		rec_argv[i++] = strdup("-W");
-	rec_argv[i++] = strdup("-d");
-	rec_argv[i++] = strdup("-e");
+	rec_argv[i++] = "record";
 
-	if (strcmp(mem_operation, MEM_OPERATION_LOAD))
-		sprintf(event, "cpu/mem-stores/pp");
-	else
-		sprintf(event, "cpu/mem-loads/pp");
+	if (mem->operation & MEM_OPERATION_LOAD)
+		rec_argv[i++] = "-W";
+
+	rec_argv[i++] = "-d";
+
+	if (mem->operation & MEM_OPERATION_LOAD) {
+		rec_argv[i++] = "-e";
+		rec_argv[i++] = "cpu/mem-loads/pp";
+	}
+
+	if (mem->operation & MEM_OPERATION_STORE) {
+		rec_argv[i++] = "-e";
+		rec_argv[i++] = "cpu/mem-stores/pp";
+	}
 
-	rec_argv[i++] = strdup(event);
 	for (j = 1; j < argc; j++, i++)
 		rec_argv[i] = argv[j];
 
@@ -162,17 +165,17 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
 	if (!rep_argv)
 		return -1;
 
-	rep_argv[i++] = strdup("report");
-	rep_argv[i++] = strdup("--mem-mode");
-	rep_argv[i++] = strdup("-n"); /* display number of samples */
+	rep_argv[i++] = "report";
+	rep_argv[i++] = "--mem-mode";
+	rep_argv[i++] = "-n"; /* display number of samples */
 
 	/*
 	 * there is no weight (cost) associated with stores, so don't print
 	 * the column
 	 */
-	if (strcmp(mem_operation, MEM_OPERATION_LOAD))
-		rep_argv[i++] = strdup("--sort=mem,sym,dso,symbol_daddr,"
-				       "dso_daddr,tlb,locked");
+	if (!(mem->operation & MEM_OPERATION_LOAD))
+		rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
+				"dso_daddr,tlb,locked";
 
 	for (j = 1; j < argc; j++, i++)
 		rep_argv[i] = argv[j];
@@ -182,6 +185,75 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
 	return ret;
 }
 
+struct mem_mode {
+	const char *name;
+	int mode;
+};
+
+#define MEM_OPT(n, m) \
+	{ .name = n, .mode = (m) }
+
+#define MEM_END { .name = NULL }
+
+static const struct mem_mode mem_modes[]={
+	MEM_OPT("load", MEM_OPERATION_LOAD),
+	MEM_OPT("store", MEM_OPERATION_STORE),
+	MEM_END
+};
+
+static int
+parse_mem_ops(const struct option *opt, const char *str, int unset)
+{
+	int *mode = (int *)opt->value;
+	const struct mem_mode *m;
+	char *s, *os = NULL, *p;
+	int ret = -1;
+
+	if (unset)
+		return 0;
+
+	/* str may be NULL in case no arg is passed to -t */
+	if (str) {
+		/* because str is read-only */
+		s = os = strdup(str);
+		if (!s)
+			return -1;
+
+		/* reset mode */
+		*mode = 0;
+
+		for (;;) {
+			p = strchr(s, ',');
+			if (p)
+				*p = '\0';
+
+			for (m = mem_modes; m->name; m++) {
+				if (!strcasecmp(s, m->name))
+					break;
+			}
+			if (!m->name) {
+				fprintf(stderr, "unknown sampling op %s,"
+					    " check man page\n", s);
+				goto error;
+			}
+
+			*mode |= m->mode;
+
+			if (!p)
+				break;
+
+			s = p + 1;
+		}
+	}
+	ret = 0;
+
+	if (*mode == 0)
+		*mode = MEM_OPERATION_LOAD;
+error:
+	free(os);
+	return ret;
+}
+
 int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	struct stat st;
@@ -197,10 +269,15 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
 			.ordered_events	= true,
 		},
 		.input_name		 = "perf.data",
+		/*
+		 * default to both load an store sampling
+		 */
+		.operation		 = MEM_OPERATION_LOAD | MEM_OPERATION_STORE,
 	};
 	const struct option mem_options[] = {
-	OPT_STRING('t', "type", &mem_operation,
-		   "type", "memory operations(load/store)"),
+	OPT_CALLBACK('t', "type", &mem.operation,
+		   "type", "memory operations(load,store) Default load,store",
+		    parse_mem_ops),
 	OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw,
 		    "dump raw samples in ASCII"),
 	OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved,
@@ -225,7 +302,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
 	argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
 					mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);
 
-	if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation))
+	if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation))
 		usage_with_options(mem_usage, mem_options);
 
 	if (!mem.input_name || !strlen(mem.input_name)) {
@@ -236,7 +313,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
 	}
 
 	if (!strncmp(argv[0], "rec", 3))
-		return __cmd_record(argc, argv);
+		return __cmd_record(argc, argv, &mem);
 	else if (!strncmp(argv[0], "rep", 3))
 		return report_events(argc, argv, &mem);
 	else

[-- Attachment #2: interdiff.patch --]
[-- Type: text/plain, Size: 2608 bytes --]

diff -u b/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
--- b/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -10,21 +10,17 @@
 #define MEM_OPERATION_LOAD	0x1
 #define MEM_OPERATION_STORE	0x2
 
-/*
- * default to both load an store sampling
- */
-static int mem_operation = MEM_OPERATION_LOAD | MEM_OPERATION_STORE;
-
 struct perf_mem {
 	struct perf_tool	tool;
 	char const		*input_name;
 	bool			hide_unresolved;
 	bool			dump_raw;
+	int			operation;
 	const char		*cpu_list;
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 };
 
-static int __cmd_record(int argc, const char **argv)
+static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 {
 	int rec_argc, i = 0, j;
 	const char **rec_argv;
@@ -37,17 +33,17 @@
 
 	rec_argv[i++] = "record";
 
-	if (mem_operation & MEM_OPERATION_LOAD)
+	if (mem->operation & MEM_OPERATION_LOAD)
 		rec_argv[i++] = "-W";
 
 	rec_argv[i++] = "-d";
 
-	if (mem_operation & MEM_OPERATION_LOAD) {
+	if (mem->operation & MEM_OPERATION_LOAD) {
 		rec_argv[i++] = "-e";
 		rec_argv[i++] = "cpu/mem-loads/pp";
 	}
 
-	if (mem_operation & MEM_OPERATION_STORE) {
+	if (mem->operation & MEM_OPERATION_STORE) {
 		rec_argv[i++] = "-e";
 		rec_argv[i++] = "cpu/mem-stores/pp";
 	}
@@ -177,7 +173,7 @@
 	 * there is no weight (cost) associated with stores, so don't print
 	 * the column
 	 */
-	if (!(mem_operation & MEM_OPERATION_LOAD))
+	if (!(mem->operation & MEM_OPERATION_LOAD))
 		rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
 				"dso_daddr,tlb,locked";
 
@@ -273,9 +269,13 @@
 			.ordered_events	= true,
 		},
 		.input_name		 = "perf.data",
+		/*
+		 * default to both load an store sampling
+		 */
+		.operation		 = MEM_OPERATION_LOAD | MEM_OPERATION_STORE,
 	};
 	const struct option mem_options[] = {
-	OPT_CALLBACK('t', "type", &mem_operation,
+	OPT_CALLBACK('t', "type", &mem.operation,
 		   "type", "memory operations(load,store) Default load,store",
 		    parse_mem_ops),
 	OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw,
@@ -302,7 +302,7 @@
 	argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
 					mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);
 
-	if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation))
+	if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation))
 		usage_with_options(mem_usage, mem_options);
 
 	if (!mem.input_name || !strlen(mem.input_name)) {
@@ -313,7 +313,7 @@
 	}
 
 	if (!strncmp(argv[0], "rec", 3))
-		return __cmd_record(argc, argv);
+		return __cmd_record(argc, argv, &mem);
 	else if (!strncmp(argv[0], "rep", 3))
 		return report_events(argc, argv, &mem);
 	else

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] perf mem: enable sampling loads and stores simultaneously
  2014-12-17 16:07 ` Arnaldo Carvalho de Melo
@ 2014-12-17 16:38   ` Stephane Eranian
  2014-12-17 16:49     ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 6+ messages in thread
From: Stephane Eranian @ 2014-12-17 16:38 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: LKML, Peter Zijlstra, mingo, ak, Jiri Olsa, David Ahern,
	Don Zickus, Richard Fowles, Joe Mario, Namhyung Kim

On Wed, Dec 17, 2014 at 11:07 AM, Arnaldo Carvalho de Melo
<acme@kernel.org> wrote:
> Em Wed, Dec 17, 2014 at 04:23:55PM +0100, Stephane Eranian escreveu:
>>
>> This patch modifies perf mem to default to sampling loads
>> and stores simultaneously. It could only do one or the other
>> before yet there was no hardware restriction preventing
>> simultaneous collection. With this patch, one run is sufficient
>> to collect both.
>>
>> It is still possible to sample only loads or stores by using the
>> -t option:
>>  $ perf mem -t load rec
>>  $ perf mem -t load rep
>> Or
>>  $ perf mem -t store rec
>>  $ perf mem -t store rep
>>
>> The perf report TUI will show one event at a time. The store
>> output will contain a Weight column which will be empty.
>>
>> In V2, we updated the man pages to reflect the change and
>> also simplify the initialization of the argv vector passed
>> to the cmd_*() functions as per LKML feedback.
>>
>> In V3, we fixed typos in the changelog.
>>
>> Signed-off-by: Stephane Eranian <eranian@google.com>
>
> So here it goes a v4, just a minor change to avoid adding another global
> variable and make this operation parm like the other parms inside that
> struct perf_mem, attached goes the interdiff from your patch to this one
> here, ok?
>
So you want me to post V4 with this one? I am fine if you add yours
directly on top of mine.

Acked-by: Stephane Eranian <eranian@google.com>

> diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
> index 1d78a4064da4..43310d8661fe 100644
> --- a/tools/perf/Documentation/perf-mem.txt
> +++ b/tools/perf/Documentation/perf-mem.txt
> @@ -12,11 +12,12 @@ SYNOPSIS
>
>  DESCRIPTION
>  -----------
> -"perf mem -t <TYPE> record" runs a command and gathers memory operation data
> +"perf mem record" runs a command and gathers memory operation data
>  from it, into perf.data. Perf record options are accepted and are passed through.
>
> -"perf mem -t <TYPE> report" displays the result. It invokes perf report with the
> -right set of options to display a memory access profile.
> +"perf mem report" displays the result. It invokes perf report with the
> +right set of options to display a memory access profile. By default, loads
> +and stores are sampled. Use the -t option to limit to loads or stores.
>
>  Note that on Intel systems the memory latency reported is the use-latency,
>  not the pure load (or store latency). Use latency includes any pipeline
> @@ -29,7 +30,7 @@ OPTIONS
>
>  -t::
>  --type=::
> -       Select the memory operation type: load or store (default: load)
> +       Select the memory operation type: load or store (default: load,store)
>
>  -D::
>  --dump-raw-samples=::
> diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
> index 24db6ffe2957..9b5663950a4d 100644
> --- a/tools/perf/builtin-mem.c
> +++ b/tools/perf/builtin-mem.c
> @@ -7,44 +7,47 @@
>  #include "util/session.h"
>  #include "util/data.h"
>
> -#define MEM_OPERATION_LOAD     "load"
> -#define MEM_OPERATION_STORE    "store"
> -
> -static const char      *mem_operation          = MEM_OPERATION_LOAD;
> +#define MEM_OPERATION_LOAD     0x1
> +#define MEM_OPERATION_STORE    0x2
>
>  struct perf_mem {
>         struct perf_tool        tool;
>         char const              *input_name;
>         bool                    hide_unresolved;
>         bool                    dump_raw;
> +       int                     operation;
>         const char              *cpu_list;
>         DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
>  };
>
> -static int __cmd_record(int argc, const char **argv)
> +static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
>  {
>         int rec_argc, i = 0, j;
>         const char **rec_argv;
> -       char event[64];
>         int ret;
>
> -       rec_argc = argc + 4;
> +       rec_argc = argc + 7; /* max number of arguments */
>         rec_argv = calloc(rec_argc + 1, sizeof(char *));
>         if (!rec_argv)
>                 return -1;
>
> -       rec_argv[i++] = strdup("record");
> -       if (!strcmp(mem_operation, MEM_OPERATION_LOAD))
> -               rec_argv[i++] = strdup("-W");
> -       rec_argv[i++] = strdup("-d");
> -       rec_argv[i++] = strdup("-e");
> +       rec_argv[i++] = "record";
>
> -       if (strcmp(mem_operation, MEM_OPERATION_LOAD))
> -               sprintf(event, "cpu/mem-stores/pp");
> -       else
> -               sprintf(event, "cpu/mem-loads/pp");
> +       if (mem->operation & MEM_OPERATION_LOAD)
> +               rec_argv[i++] = "-W";
> +
> +       rec_argv[i++] = "-d";
> +
> +       if (mem->operation & MEM_OPERATION_LOAD) {
> +               rec_argv[i++] = "-e";
> +               rec_argv[i++] = "cpu/mem-loads/pp";
> +       }
> +
> +       if (mem->operation & MEM_OPERATION_STORE) {
> +               rec_argv[i++] = "-e";
> +               rec_argv[i++] = "cpu/mem-stores/pp";
> +       }
>
> -       rec_argv[i++] = strdup(event);
>         for (j = 1; j < argc; j++, i++)
>                 rec_argv[i] = argv[j];
>
> @@ -162,17 +165,17 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
>         if (!rep_argv)
>                 return -1;
>
> -       rep_argv[i++] = strdup("report");
> -       rep_argv[i++] = strdup("--mem-mode");
> -       rep_argv[i++] = strdup("-n"); /* display number of samples */
> +       rep_argv[i++] = "report";
> +       rep_argv[i++] = "--mem-mode";
> +       rep_argv[i++] = "-n"; /* display number of samples */
>
>         /*
>          * there is no weight (cost) associated with stores, so don't print
>          * the column
>          */
> -       if (strcmp(mem_operation, MEM_OPERATION_LOAD))
> -               rep_argv[i++] = strdup("--sort=mem,sym,dso,symbol_daddr,"
> -                                      "dso_daddr,tlb,locked");
> +       if (!(mem->operation & MEM_OPERATION_LOAD))
> +               rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
> +                               "dso_daddr,tlb,locked";
>
>         for (j = 1; j < argc; j++, i++)
>                 rep_argv[i] = argv[j];
> @@ -182,6 +185,75 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
>         return ret;
>  }
>
> +struct mem_mode {
> +       const char *name;
> +       int mode;
> +};
> +
> +#define MEM_OPT(n, m) \
> +       { .name = n, .mode = (m) }
> +
> +#define MEM_END { .name = NULL }
> +
> +static const struct mem_mode mem_modes[]={
> +       MEM_OPT("load", MEM_OPERATION_LOAD),
> +       MEM_OPT("store", MEM_OPERATION_STORE),
> +       MEM_END
> +};
> +
> +static int
> +parse_mem_ops(const struct option *opt, const char *str, int unset)
> +{
> +       int *mode = (int *)opt->value;
> +       const struct mem_mode *m;
> +       char *s, *os = NULL, *p;
> +       int ret = -1;
> +
> +       if (unset)
> +               return 0;
> +
> +       /* str may be NULL in case no arg is passed to -t */
> +       if (str) {
> +               /* because str is read-only */
> +               s = os = strdup(str);
> +               if (!s)
> +                       return -1;
> +
> +               /* reset mode */
> +               *mode = 0;
> +
> +               for (;;) {
> +                       p = strchr(s, ',');
> +                       if (p)
> +                               *p = '\0';
> +
> +                       for (m = mem_modes; m->name; m++) {
> +                               if (!strcasecmp(s, m->name))
> +                                       break;
> +                       }
> +                       if (!m->name) {
> +                               fprintf(stderr, "unknown sampling op %s,"
> +                                           " check man page\n", s);
> +                               goto error;
> +                       }
> +
> +                       *mode |= m->mode;
> +
> +                       if (!p)
> +                               break;
> +
> +                       s = p + 1;
> +               }
> +       }
> +       ret = 0;
> +
> +       if (*mode == 0)
> +               *mode = MEM_OPERATION_LOAD;
> +error:
> +       free(os);
> +       return ret;
> +}
> +
>  int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
>  {
>         struct stat st;
> @@ -197,10 +269,15 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
>                         .ordered_events = true,
>                 },
>                 .input_name              = "perf.data",
> +               /*
> +                * default to both load an store sampling
> +                */
> +               .operation               = MEM_OPERATION_LOAD | MEM_OPERATION_STORE,
>         };
>         const struct option mem_options[] = {
> -       OPT_STRING('t', "type", &mem_operation,
> -                  "type", "memory operations(load/store)"),
> +       OPT_CALLBACK('t', "type", &mem.operation,
> +                  "type", "memory operations(load,store) Default load,store",
> +                   parse_mem_ops),
>         OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw,
>                     "dump raw samples in ASCII"),
>         OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved,
> @@ -225,7 +302,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
>         argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
>                                         mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);
>
> -       if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation))
> +       if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation))
>                 usage_with_options(mem_usage, mem_options);
>
>         if (!mem.input_name || !strlen(mem.input_name)) {
> @@ -236,7 +313,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
>         }
>
>         if (!strncmp(argv[0], "rec", 3))
> -               return __cmd_record(argc, argv);
> +               return __cmd_record(argc, argv, &mem);
>         else if (!strncmp(argv[0], "rep", 3))
>                 return report_events(argc, argv, &mem);
>         else

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] perf mem: enable sampling loads and stores simultaneously
  2014-12-17 16:38   ` Stephane Eranian
@ 2014-12-17 16:49     ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 6+ messages in thread
From: Arnaldo Carvalho de Melo @ 2014-12-17 16:49 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: LKML, Peter Zijlstra, mingo, ak, Jiri Olsa, David Ahern,
	Don Zickus, Richard Fowles, Joe Mario, Namhyung Kim

Em Wed, Dec 17, 2014 at 11:38:11AM -0500, Stephane Eranian escreveu:
> On Wed, Dec 17, 2014 at 11:07 AM, Arnaldo Carvalho de Melo
> <acme@kernel.org> wrote:
> > Em Wed, Dec 17, 2014 at 04:23:55PM +0100, Stephane Eranian escreveu:
> >>
> >> This patch modifies perf mem to default to sampling loads
> >> and stores simultaneously. It could only do one or the other
> >> before yet there was no hardware restriction preventing
> >> simultaneous collection. With this patch, one run is sufficient
> >> to collect both.
> >>
> >> It is still possible to sample only loads or stores by using the
> >> -t option:
> >>  $ perf mem -t load rec
> >>  $ perf mem -t load rep
> >> Or
> >>  $ perf mem -t store rec
> >>  $ perf mem -t store rep
> >>
> >> The perf report TUI will show one event at a time. The store
> >> output will contain a Weight column which will be empty.
> >>
> >> In V2, we updated the man pages to reflect the change and
> >> also simplify the initialization of the argv vector passed
> >> to the cmd_*() functions as per LKML feedback.
> >>
> >> In V3, we fixed typos in the changelog.
> >>
> >> Signed-off-by: Stephane Eranian <eranian@google.com>
> >
> > So here it goes a v4, just a minor change to avoid adding another global
> > variable and make this operation parm like the other parms inside that
> > struct perf_mem, attached goes the interdiff from your patch to this one
> > here, ok?

> So you want me to post V4 with this one? I am fine if you add yours
> directly on top of mine.

Ok, then I'll do it + your ack, thanks for checking!

- Arnaldo
 
> Acked-by: Stephane Eranian <eranian@google.com>
> 
> > diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
> > index 1d78a4064da4..43310d8661fe 100644
> > --- a/tools/perf/Documentation/perf-mem.txt
> > +++ b/tools/perf/Documentation/perf-mem.txt
> > @@ -12,11 +12,12 @@ SYNOPSIS
> >
> >  DESCRIPTION
> >  -----------
> > -"perf mem -t <TYPE> record" runs a command and gathers memory operation data
> > +"perf mem record" runs a command and gathers memory operation data
> >  from it, into perf.data. Perf record options are accepted and are passed through.
> >
> > -"perf mem -t <TYPE> report" displays the result. It invokes perf report with the
> > -right set of options to display a memory access profile.
> > +"perf mem report" displays the result. It invokes perf report with the
> > +right set of options to display a memory access profile. By default, loads
> > +and stores are sampled. Use the -t option to limit to loads or stores.
> >
> >  Note that on Intel systems the memory latency reported is the use-latency,
> >  not the pure load (or store latency). Use latency includes any pipeline
> > @@ -29,7 +30,7 @@ OPTIONS
> >
> >  -t::
> >  --type=::
> > -       Select the memory operation type: load or store (default: load)
> > +       Select the memory operation type: load or store (default: load,store)
> >
> >  -D::
> >  --dump-raw-samples=::
> > diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
> > index 24db6ffe2957..9b5663950a4d 100644
> > --- a/tools/perf/builtin-mem.c
> > +++ b/tools/perf/builtin-mem.c
> > @@ -7,44 +7,47 @@
> >  #include "util/session.h"
> >  #include "util/data.h"
> >
> > -#define MEM_OPERATION_LOAD     "load"
> > -#define MEM_OPERATION_STORE    "store"
> > -
> > -static const char      *mem_operation          = MEM_OPERATION_LOAD;
> > +#define MEM_OPERATION_LOAD     0x1
> > +#define MEM_OPERATION_STORE    0x2
> >
> >  struct perf_mem {
> >         struct perf_tool        tool;
> >         char const              *input_name;
> >         bool                    hide_unresolved;
> >         bool                    dump_raw;
> > +       int                     operation;
> >         const char              *cpu_list;
> >         DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
> >  };
> >
> > -static int __cmd_record(int argc, const char **argv)
> > +static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
> >  {
> >         int rec_argc, i = 0, j;
> >         const char **rec_argv;
> > -       char event[64];
> >         int ret;
> >
> > -       rec_argc = argc + 4;
> > +       rec_argc = argc + 7; /* max number of arguments */
> >         rec_argv = calloc(rec_argc + 1, sizeof(char *));
> >         if (!rec_argv)
> >                 return -1;
> >
> > -       rec_argv[i++] = strdup("record");
> > -       if (!strcmp(mem_operation, MEM_OPERATION_LOAD))
> > -               rec_argv[i++] = strdup("-W");
> > -       rec_argv[i++] = strdup("-d");
> > -       rec_argv[i++] = strdup("-e");
> > +       rec_argv[i++] = "record";
> >
> > -       if (strcmp(mem_operation, MEM_OPERATION_LOAD))
> > -               sprintf(event, "cpu/mem-stores/pp");
> > -       else
> > -               sprintf(event, "cpu/mem-loads/pp");
> > +       if (mem->operation & MEM_OPERATION_LOAD)
> > +               rec_argv[i++] = "-W";
> > +
> > +       rec_argv[i++] = "-d";
> > +
> > +       if (mem->operation & MEM_OPERATION_LOAD) {
> > +               rec_argv[i++] = "-e";
> > +               rec_argv[i++] = "cpu/mem-loads/pp";
> > +       }
> > +
> > +       if (mem->operation & MEM_OPERATION_STORE) {
> > +               rec_argv[i++] = "-e";
> > +               rec_argv[i++] = "cpu/mem-stores/pp";
> > +       }
> >
> > -       rec_argv[i++] = strdup(event);
> >         for (j = 1; j < argc; j++, i++)
> >                 rec_argv[i] = argv[j];
> >
> > @@ -162,17 +165,17 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
> >         if (!rep_argv)
> >                 return -1;
> >
> > -       rep_argv[i++] = strdup("report");
> > -       rep_argv[i++] = strdup("--mem-mode");
> > -       rep_argv[i++] = strdup("-n"); /* display number of samples */
> > +       rep_argv[i++] = "report";
> > +       rep_argv[i++] = "--mem-mode";
> > +       rep_argv[i++] = "-n"; /* display number of samples */
> >
> >         /*
> >          * there is no weight (cost) associated with stores, so don't print
> >          * the column
> >          */
> > -       if (strcmp(mem_operation, MEM_OPERATION_LOAD))
> > -               rep_argv[i++] = strdup("--sort=mem,sym,dso,symbol_daddr,"
> > -                                      "dso_daddr,tlb,locked");
> > +       if (!(mem->operation & MEM_OPERATION_LOAD))
> > +               rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
> > +                               "dso_daddr,tlb,locked";
> >
> >         for (j = 1; j < argc; j++, i++)
> >                 rep_argv[i] = argv[j];
> > @@ -182,6 +185,75 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
> >         return ret;
> >  }
> >
> > +struct mem_mode {
> > +       const char *name;
> > +       int mode;
> > +};
> > +
> > +#define MEM_OPT(n, m) \
> > +       { .name = n, .mode = (m) }
> > +
> > +#define MEM_END { .name = NULL }
> > +
> > +static const struct mem_mode mem_modes[]={
> > +       MEM_OPT("load", MEM_OPERATION_LOAD),
> > +       MEM_OPT("store", MEM_OPERATION_STORE),
> > +       MEM_END
> > +};
> > +
> > +static int
> > +parse_mem_ops(const struct option *opt, const char *str, int unset)
> > +{
> > +       int *mode = (int *)opt->value;
> > +       const struct mem_mode *m;
> > +       char *s, *os = NULL, *p;
> > +       int ret = -1;
> > +
> > +       if (unset)
> > +               return 0;
> > +
> > +       /* str may be NULL in case no arg is passed to -t */
> > +       if (str) {
> > +               /* because str is read-only */
> > +               s = os = strdup(str);
> > +               if (!s)
> > +                       return -1;
> > +
> > +               /* reset mode */
> > +               *mode = 0;
> > +
> > +               for (;;) {
> > +                       p = strchr(s, ',');
> > +                       if (p)
> > +                               *p = '\0';
> > +
> > +                       for (m = mem_modes; m->name; m++) {
> > +                               if (!strcasecmp(s, m->name))
> > +                                       break;
> > +                       }
> > +                       if (!m->name) {
> > +                               fprintf(stderr, "unknown sampling op %s,"
> > +                                           " check man page\n", s);
> > +                               goto error;
> > +                       }
> > +
> > +                       *mode |= m->mode;
> > +
> > +                       if (!p)
> > +                               break;
> > +
> > +                       s = p + 1;
> > +               }
> > +       }
> > +       ret = 0;
> > +
> > +       if (*mode == 0)
> > +               *mode = MEM_OPERATION_LOAD;
> > +error:
> > +       free(os);
> > +       return ret;
> > +}
> > +
> >  int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
> >  {
> >         struct stat st;
> > @@ -197,10 +269,15 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
> >                         .ordered_events = true,
> >                 },
> >                 .input_name              = "perf.data",
> > +               /*
> > +                * default to both load an store sampling
> > +                */
> > +               .operation               = MEM_OPERATION_LOAD | MEM_OPERATION_STORE,
> >         };
> >         const struct option mem_options[] = {
> > -       OPT_STRING('t', "type", &mem_operation,
> > -                  "type", "memory operations(load/store)"),
> > +       OPT_CALLBACK('t', "type", &mem.operation,
> > +                  "type", "memory operations(load,store) Default load,store",
> > +                   parse_mem_ops),
> >         OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw,
> >                     "dump raw samples in ASCII"),
> >         OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved,
> > @@ -225,7 +302,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
> >         argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
> >                                         mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);
> >
> > -       if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation))
> > +       if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation))
> >                 usage_with_options(mem_usage, mem_options);
> >
> >         if (!mem.input_name || !strlen(mem.input_name)) {
> > @@ -236,7 +313,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
> >         }
> >
> >         if (!strncmp(argv[0], "rec", 3))
> > -               return __cmd_record(argc, argv);
> > +               return __cmd_record(argc, argv, &mem);
> >         else if (!strncmp(argv[0], "rep", 3))
> >                 return report_events(argc, argv, &mem);
> >         else

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] perf mem: enable sampling loads and stores simultaneously
  2014-12-17 15:23 [PATCH v3] perf mem: enable sampling loads and stores simultaneously Stephane Eranian
  2014-12-17 16:07 ` Arnaldo Carvalho de Melo
@ 2014-12-18  4:52 ` Namhyung Kim
  2015-01-28 15:03 ` [tip:perf/core] perf mem: Enable " tip-bot for Stephane Eranian
  2 siblings, 0 replies; 6+ messages in thread
From: Namhyung Kim @ 2014-12-18  4:52 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: linux-kernel, peterz, mingo, ak, jolsa, acme, dsahern, dzickus,
	rfowles, jmario

Hi Stephane,

On Wed, Dec 17, 2014 at 04:23:55PM +0100, Stephane Eranian wrote:
> 
> This patch modifies perf mem to default to sampling loads
> and stores simultaneously. It could only do one or the other
> before yet there was no hardware restriction preventing
> simultaneous collection. With this patch, one run is sufficient
> to collect both.
> 
> It is still possible to sample only loads or stores by using the
> -t option:
>  $ perf mem -t load rec
>  $ perf mem -t load rep
> Or
>  $ perf mem -t store rec
>  $ perf mem -t store rep
> 
> The perf report TUI will show one event at a time. The store
> output will contain a Weight column which will be empty.
> 
> In V2, we updated the man pages to reflect the change and
> also simplify the initialization of the argv vector passed
> to the cmd_*() functions as per LKML feedback.
> 
> In V3, we fixed typos in the changelog.
> 
> Signed-off-by: Stephane Eranian <eranian@google.com>

Acked-by: Namhyung Kim <namhyung@kernel.org>

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [tip:perf/core] perf mem: Enable sampling loads and stores simultaneously
  2014-12-17 15:23 [PATCH v3] perf mem: enable sampling loads and stores simultaneously Stephane Eranian
  2014-12-17 16:07 ` Arnaldo Carvalho de Melo
  2014-12-18  4:52 ` Namhyung Kim
@ 2015-01-28 15:03 ` tip-bot for Stephane Eranian
  2 siblings, 0 replies; 6+ messages in thread
From: tip-bot for Stephane Eranian @ 2015-01-28 15:03 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: namhyung, dzickus, mingo, acme, linux-kernel, jmario, mingo,
	eranian, rfowles, ak, jolsa, tglx, peterz, hpa, dsahern

Commit-ID:  67121f85e464d66596f99afd8d188c1ae892f8fb
Gitweb:     http://git.kernel.org/tip/67121f85e464d66596f99afd8d188c1ae892f8fb
Author:     Stephane Eranian <eranian@google.com>
AuthorDate: Wed, 17 Dec 2014 16:23:55 +0100
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Wed, 21 Jan 2015 13:24:31 -0300

perf mem: Enable sampling loads and stores simultaneously

This patch modifies perf mem to default to sampling loads and stores
simultaneously. It could only do one or the other before yet there was
no hardware restriction preventing simultaneous collection. With this
patch, one run is sufficient to collect both.

It is still possible to sample only loads or stores by using the
-t option:
 $ perf mem -t load rec
 $ perf mem -t load rep
Or
 $ perf mem -t store rec
 $ perf mem -t store rep

The perf report TUI will show one event at a time. The store output will
contain a Weight column which will be empty.

In V2, we updated the man pages to reflect the change and also simplify
the initialization of the argv vector passed to the cmd_*() functions as
per LKML feedback.

In V3, we fixed typos in the changelog.

Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Fowles <rfowles@redhat.com>
Link: http://lkml.kernel.org/r/20141217152355.GA10053@thinkpad
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-mem.txt |   9 +--
 tools/perf/builtin-mem.c              | 123 +++++++++++++++++++++++++++-------
 2 files changed, 105 insertions(+), 27 deletions(-)

diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 1d78a40..43310d8 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -12,11 +12,12 @@ SYNOPSIS
 
 DESCRIPTION
 -----------
-"perf mem -t <TYPE> record" runs a command and gathers memory operation data
+"perf mem record" runs a command and gathers memory operation data
 from it, into perf.data. Perf record options are accepted and are passed through.
 
-"perf mem -t <TYPE> report" displays the result. It invokes perf report with the
-right set of options to display a memory access profile.
+"perf mem report" displays the result. It invokes perf report with the
+right set of options to display a memory access profile. By default, loads
+and stores are sampled. Use the -t option to limit to loads or stores.
 
 Note that on Intel systems the memory latency reported is the use-latency,
 not the pure load (or store latency). Use latency includes any pipeline
@@ -29,7 +30,7 @@ OPTIONS
 
 -t::
 --type=::
-	Select the memory operation type: load or store (default: load)
+	Select the memory operation type: load or store (default: load,store)
 
 -D::
 --dump-raw-samples=::
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 24db6ff..1eded0a 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -7,10 +7,13 @@
 #include "util/session.h"
 #include "util/data.h"
 
-#define MEM_OPERATION_LOAD	"load"
-#define MEM_OPERATION_STORE	"store"
+#define MEM_OPERATION_LOAD	0x1
+#define MEM_OPERATION_STORE	0x2
 
-static const char	*mem_operation		= MEM_OPERATION_LOAD;
+/*
+ * default to both load an store sampling
+ */
+static int mem_operation = MEM_OPERATION_LOAD | MEM_OPERATION_STORE;
 
 struct perf_mem {
 	struct perf_tool	tool;
@@ -25,26 +28,30 @@ static int __cmd_record(int argc, const char **argv)
 {
 	int rec_argc, i = 0, j;
 	const char **rec_argv;
-	char event[64];
 	int ret;
 
-	rec_argc = argc + 4;
+	rec_argc = argc + 7; /* max number of arguments */
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
 	if (!rec_argv)
 		return -1;
 
-	rec_argv[i++] = strdup("record");
-	if (!strcmp(mem_operation, MEM_OPERATION_LOAD))
-		rec_argv[i++] = strdup("-W");
-	rec_argv[i++] = strdup("-d");
-	rec_argv[i++] = strdup("-e");
+	rec_argv[i++] = "record";
 
-	if (strcmp(mem_operation, MEM_OPERATION_LOAD))
-		sprintf(event, "cpu/mem-stores/pp");
-	else
-		sprintf(event, "cpu/mem-loads/pp");
+	if (mem_operation & MEM_OPERATION_LOAD)
+		rec_argv[i++] = "-W";
+
+	rec_argv[i++] = "-d";
+
+	if (mem_operation & MEM_OPERATION_LOAD) {
+		rec_argv[i++] = "-e";
+		rec_argv[i++] = "cpu/mem-loads/pp";
+	}
+
+	if (mem_operation & MEM_OPERATION_STORE) {
+		rec_argv[i++] = "-e";
+		rec_argv[i++] = "cpu/mem-stores/pp";
+	}
 
-	rec_argv[i++] = strdup(event);
 	for (j = 1; j < argc; j++, i++)
 		rec_argv[i] = argv[j];
 
@@ -162,17 +169,17 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
 	if (!rep_argv)
 		return -1;
 
-	rep_argv[i++] = strdup("report");
-	rep_argv[i++] = strdup("--mem-mode");
-	rep_argv[i++] = strdup("-n"); /* display number of samples */
+	rep_argv[i++] = "report";
+	rep_argv[i++] = "--mem-mode";
+	rep_argv[i++] = "-n"; /* display number of samples */
 
 	/*
 	 * there is no weight (cost) associated with stores, so don't print
 	 * the column
 	 */
-	if (strcmp(mem_operation, MEM_OPERATION_LOAD))
-		rep_argv[i++] = strdup("--sort=mem,sym,dso,symbol_daddr,"
-				       "dso_daddr,tlb,locked");
+	if (!(mem_operation & MEM_OPERATION_LOAD))
+		rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
+				"dso_daddr,tlb,locked";
 
 	for (j = 1; j < argc; j++, i++)
 		rep_argv[i] = argv[j];
@@ -182,6 +189,75 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
 	return ret;
 }
 
+struct mem_mode {
+	const char *name;
+	int mode;
+};
+
+#define MEM_OPT(n, m) \
+	{ .name = n, .mode = (m) }
+
+#define MEM_END { .name = NULL }
+
+static const struct mem_mode mem_modes[]={
+	MEM_OPT("load", MEM_OPERATION_LOAD),
+	MEM_OPT("store", MEM_OPERATION_STORE),
+	MEM_END
+};
+
+static int
+parse_mem_ops(const struct option *opt, const char *str, int unset)
+{
+	int *mode = (int *)opt->value;
+	const struct mem_mode *m;
+	char *s, *os = NULL, *p;
+	int ret = -1;
+
+	if (unset)
+		return 0;
+
+	/* str may be NULL in case no arg is passed to -t */
+	if (str) {
+		/* because str is read-only */
+		s = os = strdup(str);
+		if (!s)
+			return -1;
+
+		/* reset mode */
+		*mode = 0;
+
+		for (;;) {
+			p = strchr(s, ',');
+			if (p)
+				*p = '\0';
+
+			for (m = mem_modes; m->name; m++) {
+				if (!strcasecmp(s, m->name))
+					break;
+			}
+			if (!m->name) {
+				fprintf(stderr, "unknown sampling op %s,"
+					    " check man page\n", s);
+				goto error;
+			}
+
+			*mode |= m->mode;
+
+			if (!p)
+				break;
+
+			s = p + 1;
+		}
+	}
+	ret = 0;
+
+	if (*mode == 0)
+		*mode = MEM_OPERATION_LOAD;
+error:
+	free(os);
+	return ret;
+}
+
 int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	struct stat st;
@@ -199,8 +275,9 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
 		.input_name		 = "perf.data",
 	};
 	const struct option mem_options[] = {
-	OPT_STRING('t', "type", &mem_operation,
-		   "type", "memory operations(load/store)"),
+	OPT_CALLBACK('t', "type", &mem_operation,
+		   "type", "memory operations(load,store) Default load,store",
+		    parse_mem_ops),
 	OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw,
 		    "dump raw samples in ASCII"),
 	OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved,

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-01-28 20:50 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-12-17 15:23 [PATCH v3] perf mem: enable sampling loads and stores simultaneously Stephane Eranian
2014-12-17 16:07 ` Arnaldo Carvalho de Melo
2014-12-17 16:38   ` Stephane Eranian
2014-12-17 16:49     ` Arnaldo Carvalho de Melo
2014-12-18  4:52 ` Namhyung Kim
2015-01-28 15:03 ` [tip:perf/core] perf mem: Enable " tip-bot for Stephane Eranian

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).