LKML Archive on lore.kernel.org help / color / mirror / Atom feed
* [PATCH -tip 0/3] perf/bench-futex: Misc updates @ 2021-08-06 1:33 Davidlohr Bueso 2021-08-06 1:33 ` [PATCH 1/3] perf/bench-futex: Group test parameters cleanup Davidlohr Bueso ` (2 more replies) 0 siblings, 3 replies; 5+ messages in thread From: Davidlohr Bueso @ 2021-08-06 1:33 UTC (permalink / raw) To: acme; +Cc: linux-kernel, linux-perf-users, dave Hi, A couple of updates for the futex perf benchmarks. Please consider for v5.15. Thanks! Davidlohr Bueso (3): perf/bench-futex: Group test parameters cleanup perf/bench-futex: Add --mlockall parameter perf/bench-futex, requeue: Add --pi parameter tools/perf/bench/futex-hash.c | 77 ++++++++----- tools/perf/bench/futex-lock-pi.c | 75 ++++++++----- tools/perf/bench/futex-requeue.c | 148 ++++++++++++++++++------- tools/perf/bench/futex-wake-parallel.c | 78 ++++++++----- tools/perf/bench/futex-wake.c | 77 ++++++++----- tools/perf/bench/futex.h | 40 ++++++- 6 files changed, 346 insertions(+), 149 deletions(-) -- 2.26.2 ^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/3] perf/bench-futex: Group test parameters cleanup 2021-08-06 1:33 [PATCH -tip 0/3] perf/bench-futex: Misc updates Davidlohr Bueso @ 2021-08-06 1:33 ` Davidlohr Bueso 2021-08-06 18:30 ` Arnaldo Carvalho de Melo 2021-08-06 1:33 ` [PATCH 2/3] perf/bench-futex: Add --mlockall parameter Davidlohr Bueso 2021-08-06 1:33 ` [PATCH 3/3] perf/bench-futex, requeue: Add --pi parameter Davidlohr Bueso 2 siblings, 1 reply; 5+ messages in thread From: Davidlohr Bueso @ 2021-08-06 1:33 UTC (permalink / raw) To: acme; +Cc: linux-kernel, linux-perf-users, dave, Davidlohr Bueso Do this across all futex-bench tests such that all program parameters are neatly in their own structure, which is nicer than how we have them now. No changes in program behavior are expected. Signed-off-by: Davidlohr Bueso <dbueso@suse.de> --- tools/perf/bench/futex-hash.c | 68 ++++++++++++++--------- tools/perf/bench/futex-lock-pi.c | 66 +++++++++++++--------- tools/perf/bench/futex-requeue.c | 76 ++++++++++++++++---------- tools/perf/bench/futex-wake-parallel.c | 69 +++++++++++++---------- tools/perf/bench/futex-wake.c | 68 ++++++++++++++--------- 5 files changed, 210 insertions(+), 137 deletions(-) diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index b65373ce5c4f..2d86602f89e2 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -29,11 +29,7 @@ #include <err.h> -static unsigned int nthreads = 0; -static unsigned int nsecs = 10; -/* amount of futexes per thread */ -static unsigned int nfutexes = 1024; -static bool fshared = false, done = false, silent = false; +static bool done = false; static int futex_flag = 0; struct timeval bench__start, bench__end, bench__runtime; @@ -49,12 +45,30 @@ struct worker { unsigned long ops; }; +struct parameters { + unsigned int nthreads; + unsigned int nfutexes; + unsigned int runtime; + bool silent; + bool fshared; +}; + +static struct parameters params = { + .nfutexes = 1024, + .runtime = 10, +}; + static const struct option options[] = { - OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), - OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"), - OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"), - OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), - OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), + OPT_UINTEGER('t', "threads", ¶ms.nthreads, + "Specify amount of threads"), + OPT_UINTEGER('r', "runtime", ¶ms.runtime, + "Specify runtime (in seconds)"), + OPT_UINTEGER('f', "futexes", ¶ms.nfutexes, + "Specify amount of futexes per threads"), + OPT_BOOLEAN( 's', "silent", ¶ms.silent, + "Silent mode: do not display data/details"), + OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, + "Use shared futexes instead of private ones"), OPT_END() }; @@ -78,7 +92,7 @@ static void *workerfn(void *arg) pthread_mutex_unlock(&thread_lock); do { - for (i = 0; i < nfutexes; i++, ops++) { + for (i = 0; i < params.nfutexes; i++, ops++) { /* * We want the futex calls to fail in order to stress * the hashing of uaddr and not measure other steps, @@ -86,7 +100,7 @@ static void *workerfn(void *arg) * the critical region protected by hb->lock. */ ret = futex_wait(&w->futex[i], 1234, NULL, futex_flag); - if (!silent && + if (!params.silent && (!ret || errno != EAGAIN || errno != EWOULDBLOCK)) warn("Non-expected futex return call"); } @@ -112,7 +126,7 @@ static void print_summary(void) double stddev = stddev_stats(&throughput_stats); printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", - !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), + !params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), (int)bench__runtime.tv_sec); } @@ -141,30 +155,30 @@ int bench_futex_hash(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); - if (!nthreads) /* default to the number of CPUs */ - nthreads = cpu->nr; + if (!params.nthreads) /* default to the number of CPUs */ + params.nthreads = cpu->nr; - worker = calloc(nthreads, sizeof(*worker)); + worker = calloc(params.nthreads, sizeof(*worker)); if (!worker) goto errmem; - if (!fshared) + if (!params.fshared) futex_flag = FUTEX_PRIVATE_FLAG; printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n", - getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs); + getpid(), params.nthreads, params.nfutexes, params.fshared ? "shared":"private", params.runtime); init_stats(&throughput_stats); pthread_mutex_init(&thread_lock, NULL); pthread_cond_init(&thread_parent, NULL); pthread_cond_init(&thread_worker, NULL); - threads_starting = nthreads; + threads_starting = params.nthreads; pthread_attr_init(&thread_attr); gettimeofday(&bench__start, NULL); - for (i = 0; i < nthreads; i++) { + for (i = 0; i < params.nthreads; i++) { worker[i].tid = i; - worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex)); + worker[i].futex = calloc(params.nfutexes, sizeof(*worker[i].futex)); if (!worker[i].futex) goto errmem; @@ -189,10 +203,10 @@ int bench_futex_hash(int argc, const char **argv) pthread_cond_broadcast(&thread_worker); pthread_mutex_unlock(&thread_lock); - sleep(nsecs); + sleep(params.runtime); toggle_done(0, NULL, NULL); - for (i = 0; i < nthreads; i++) { + for (i = 0; i < params.nthreads; i++) { ret = pthread_join(worker[i].thread, NULL); if (ret) err(EXIT_FAILURE, "pthread_join"); @@ -203,18 +217,18 @@ int bench_futex_hash(int argc, const char **argv) pthread_cond_destroy(&thread_worker); pthread_mutex_destroy(&thread_lock); - for (i = 0; i < nthreads; i++) { + for (i = 0; i < params.nthreads; i++) { unsigned long t = bench__runtime.tv_sec > 0 ? worker[i].ops / bench__runtime.tv_sec : 0; update_stats(&throughput_stats, t); - if (!silent) { - if (nfutexes == 1) + if (!params.silent) { + if (params.nfutexes == 1) printf("[thread %2d] futex: %p [ %ld ops/sec ]\n", worker[i].tid, &worker[i].futex[0], t); else printf("[thread %2d] futexes: %p ... %p [ %ld ops/sec ]\n", worker[i].tid, &worker[i].futex[0], - &worker[i].futex[nfutexes-1], t); + &worker[i].futex[params.nfutexes-1], t); } zfree(&worker[i].futex); diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 89c6d160379c..9fc994beb933 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -31,22 +31,36 @@ struct worker { static u_int32_t global_futex = 0; static struct worker *worker; -static unsigned int nsecs = 10; -static bool silent = false, multi = false; -static bool done = false, fshared = false; -static unsigned int nthreads = 0; +static bool done = false; static int futex_flag = 0; static pthread_mutex_t thread_lock; static unsigned int threads_starting; static struct stats throughput_stats; static pthread_cond_t thread_parent, thread_worker; +struct parameters { + unsigned int nthreads; + unsigned int runtime; + bool multi; + bool silent; + bool fshared; +}; + +static struct parameters params = { + .runtime = 10, +}; + static const struct option options[] = { - OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), - OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"), - OPT_BOOLEAN( 'M', "multi", &multi, "Use multiple futexes"), - OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), - OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), + OPT_UINTEGER('t', "threads", ¶ms.nthreads, + "Specify amount of threads"), + OPT_UINTEGER('r', "runtime", ¶ms.runtime, + "Specify runtime (in seconds)"), + OPT_BOOLEAN( 'M', "params.multi", ¶ms.multi, + "Use params.multiple futexes"), + OPT_BOOLEAN( 's', "params.silent", ¶ms.silent, + "Params.Silent mode: do not display data/details"), + OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, + "Use shared futexes instead of private ones"), OPT_END() }; @@ -61,7 +75,7 @@ static void print_summary(void) double stddev = stddev_stats(&throughput_stats); printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", - !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), + !params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), (int)bench__runtime.tv_sec); } @@ -93,7 +107,7 @@ static void *workerfn(void *arg) ret = futex_lock_pi(w->futex, NULL, futex_flag); if (ret) { /* handle lock acquisition */ - if (!silent) + if (!params.silent) warn("thread %d: Could not lock pi-lock for %p (%d)", w->tid, w->futex, ret); if (done) @@ -104,7 +118,7 @@ static void *workerfn(void *arg) usleep(1); ret = futex_unlock_pi(w->futex, futex_flag); - if (ret && !silent) + if (ret && !params.silent) warn("thread %d: Could not unlock pi-lock for %p (%d)", w->tid, w->futex, ret); ops++; /* account for thread's share of work */ @@ -120,12 +134,12 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr, cpu_set_t cpuset; unsigned int i; - threads_starting = nthreads; + threads_starting = params.nthreads; - for (i = 0; i < nthreads; i++) { + for (i = 0; i < params.nthreads; i++) { worker[i].tid = i; - if (multi) { + if (params.multi) { worker[i].futex = calloc(1, sizeof(u_int32_t)); if (!worker[i].futex) err(EXIT_FAILURE, "calloc"); @@ -164,25 +178,25 @@ int bench_futex_lock_pi(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); - if (!nthreads) - nthreads = cpu->nr; + if (!params.nthreads) + params.nthreads = cpu->nr; - worker = calloc(nthreads, sizeof(*worker)); + worker = calloc(params.nthreads, sizeof(*worker)); if (!worker) err(EXIT_FAILURE, "calloc"); - if (!fshared) + if (!params.fshared) futex_flag = FUTEX_PRIVATE_FLAG; printf("Run summary [PID %d]: %d threads doing pi lock/unlock pairing for %d secs.\n\n", - getpid(), nthreads, nsecs); + getpid(), params.nthreads, params.runtime); init_stats(&throughput_stats); pthread_mutex_init(&thread_lock, NULL); pthread_cond_init(&thread_parent, NULL); pthread_cond_init(&thread_worker, NULL); - threads_starting = nthreads; + threads_starting = params.nthreads; pthread_attr_init(&thread_attr); gettimeofday(&bench__start, NULL); @@ -195,10 +209,10 @@ int bench_futex_lock_pi(int argc, const char **argv) pthread_cond_broadcast(&thread_worker); pthread_mutex_unlock(&thread_lock); - sleep(nsecs); + sleep(params.runtime); toggle_done(0, NULL, NULL); - for (i = 0; i < nthreads; i++) { + for (i = 0; i < params.nthreads; i++) { ret = pthread_join(worker[i].thread, NULL); if (ret) err(EXIT_FAILURE, "pthread_join"); @@ -209,16 +223,16 @@ int bench_futex_lock_pi(int argc, const char **argv) pthread_cond_destroy(&thread_worker); pthread_mutex_destroy(&thread_lock); - for (i = 0; i < nthreads; i++) { + for (i = 0; i < params.nthreads; i++) { unsigned long t = bench__runtime.tv_sec > 0 ? worker[i].ops / bench__runtime.tv_sec : 0; update_stats(&throughput_stats, t); - if (!silent) + if (!params.silent) printf("[thread %3d] futex: %p [ %ld ops/sec ]\n", worker[i].tid, worker[i].futex, t); - if (multi) + if (params.multi) zfree(&worker[i].futex); } diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 5fa23295ee5f..b65761e98245 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -30,25 +30,38 @@ static u_int32_t futex1 = 0, futex2 = 0; -/* - * How many tasks to requeue at a time. - * Default to 1 in order to make the kernel work more. - */ -static unsigned int nrequeue = 1; - static pthread_t *worker; -static bool done = false, silent = false, fshared = false; +static bool done = false; static pthread_mutex_t thread_lock; static pthread_cond_t thread_parent, thread_worker; static struct stats requeuetime_stats, requeued_stats; -static unsigned int threads_starting, nthreads = 0; +static unsigned int threads_starting; static int futex_flag = 0; +struct parameters { + unsigned int nthreads; + unsigned int nrequeue; + bool silent; + bool fshared; +}; + +static struct parameters params = { + /* + * How many tasks to requeue at a time. + * Default to 1 in order to make the kernel work more. + */ + .nrequeue = 1, +}; + static const struct option options[] = { - OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), - OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"), - OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), - OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), + OPT_UINTEGER('t', "threads", ¶ms.nthreads, + "Specify amount of threads"), + OPT_UINTEGER('q', "nrequeue", ¶ms.nrequeue, + "Specify amount of threads to requeue at once"), + OPT_BOOLEAN( 's', "silent", ¶ms.silent, + "Silent mode: do not display data/details"), + OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, + "Use shared futexes instead of private ones"), OPT_END() }; @@ -65,7 +78,7 @@ static void print_summary(void) printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n", requeued_avg, - nthreads, + params.nthreads, requeuetime_avg / USEC_PER_MSEC, rel_stddev_stats(requeuetime_stddev, requeuetime_avg)); } @@ -89,10 +102,10 @@ static void block_threads(pthread_t *w, cpu_set_t cpuset; unsigned int i; - threads_starting = nthreads; + threads_starting = params.nthreads; /* create and block all threads */ - for (i = 0; i < nthreads; i++) { + for (i = 0; i < params.nthreads; i++) { CPU_ZERO(&cpuset); CPU_SET(cpu->map[i % cpu->nr], &cpuset); @@ -132,22 +145,22 @@ int bench_futex_requeue(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); - if (!nthreads) - nthreads = cpu->nr; + if (!params.nthreads) + params.nthreads = cpu->nr; - worker = calloc(nthreads, sizeof(*worker)); + worker = calloc(params.nthreads, sizeof(*worker)); if (!worker) err(EXIT_FAILURE, "calloc"); - if (!fshared) + if (!params.fshared) futex_flag = FUTEX_PRIVATE_FLAG; - if (nrequeue > nthreads) - nrequeue = nthreads; + if (params.nrequeue > params.nthreads) + params.nrequeue = params.nthreads; printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), " - "%d at a time.\n\n", getpid(), nthreads, - fshared ? "shared":"private", &futex1, &futex2, nrequeue); + "%d at a time.\n\n", getpid(), params.nthreads, + params.fshared ? "shared":"private", &futex1, &futex2, params.nrequeue); init_stats(&requeued_stats); init_stats(&requeuetime_stats); @@ -174,13 +187,14 @@ int bench_futex_requeue(int argc, const char **argv) /* Ok, all threads are patiently blocked, start requeueing */ gettimeofday(&start, NULL); - while (nrequeued < nthreads) { + while (nrequeued < params.nthreads) { /* * Do not wakeup any tasks blocked on futex1, allowing * us to really measure futex_wait functionality. */ nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0, - nrequeue, futex_flag); + params.nrequeue, + futex_flag); } gettimeofday(&end, NULL); @@ -189,17 +203,19 @@ int bench_futex_requeue(int argc, const char **argv) update_stats(&requeued_stats, nrequeued); update_stats(&requeuetime_stats, runtime.tv_usec); - if (!silent) { + if (!params.silent) { printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n", - j + 1, nrequeued, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC); + j + 1, nrequeued, params.nthreads, + runtime.tv_usec / (double)USEC_PER_MSEC); } /* everybody should be blocked on futex2, wake'em up */ nrequeued = futex_wake(&futex2, nrequeued, futex_flag); - if (nthreads != nrequeued) - warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads); + if (params.nthreads != nrequeued) + warnx("couldn't wakeup all tasks (%d/%d)", + nrequeued, params.nthreads); - for (i = 0; i < nthreads; i++) { + for (i = 0; i < params.nthreads; i++) { ret = pthread_join(worker[i], NULL); if (ret) err(EXIT_FAILURE, "pthread_join"); diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 6e6f5247e1fe..a80dfff5fe37 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -47,8 +47,7 @@ static unsigned int nwakes = 1; static u_int32_t futex = 0; static pthread_t *blocked_worker; -static bool done = false, silent = false, fshared = false; -static unsigned int nblocked_threads = 0, nwaking_threads = 0; +static bool done = false; static pthread_mutex_t thread_lock; static pthread_cond_t thread_parent, thread_worker; static pthread_barrier_t barrier; @@ -56,11 +55,24 @@ static struct stats waketime_stats, wakeup_stats; static unsigned int threads_starting; static int futex_flag = 0; +struct parameters { + unsigned int nwaking_threads; + unsigned int nblocked_threads; + bool silent; + bool fshared; +}; + +static struct parameters params; + static const struct option options[] = { - OPT_UINTEGER('t', "threads", &nblocked_threads, "Specify amount of threads"), - OPT_UINTEGER('w', "nwakers", &nwaking_threads, "Specify amount of waking threads"), - OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), - OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), + OPT_UINTEGER('t', "threads", ¶ms.nblocked_threads, + "Specify amount of threads"), + OPT_UINTEGER('w', "nwakers", ¶ms.nwaking_threads, + "Specify amount of waking threads"), + OPT_BOOLEAN( 's', "silent", ¶ms.silent, + "Silent mode: do not display data/details"), + OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, + "Use shared futexes instead of private ones"), OPT_END() }; @@ -96,10 +108,10 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr) pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE); - pthread_barrier_init(&barrier, NULL, nwaking_threads + 1); + pthread_barrier_init(&barrier, NULL, params.nwaking_threads + 1); /* create and block all threads */ - for (i = 0; i < nwaking_threads; i++) { + for (i = 0; i < params.nwaking_threads; i++) { /* * Thread creation order will impact per-thread latency * as it will affect the order to acquire the hb spinlock. @@ -112,7 +124,7 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr) pthread_barrier_wait(&barrier); - for (i = 0; i < nwaking_threads; i++) + for (i = 0; i < params.nwaking_threads; i++) if (pthread_join(td[i].worker, NULL)) err(EXIT_FAILURE, "pthread_join"); @@ -143,10 +155,10 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr, cpu_set_t cpuset; unsigned int i; - threads_starting = nblocked_threads; + threads_starting = params.nblocked_threads; /* create and block all threads */ - for (i = 0; i < nblocked_threads; i++) { + for (i = 0; i < params.nblocked_threads; i++) { CPU_ZERO(&cpuset); CPU_SET(cpu->map[i % cpu->nr], &cpuset); @@ -167,7 +179,7 @@ static void print_run(struct thread_data *waking_worker, unsigned int run_num) init_stats(&__wakeup_stats); init_stats(&__waketime_stats); - for (i = 0; i < nwaking_threads; i++) { + for (i = 0; i < params.nwaking_threads; i++) { update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec); update_stats(&__wakeup_stats, waking_worker[i].nwoken); } @@ -178,7 +190,7 @@ static void print_run(struct thread_data *waking_worker, unsigned int run_num) printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) " "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg, - nblocked_threads, waketime_avg / USEC_PER_MSEC, + params.nblocked_threads, waketime_avg / USEC_PER_MSEC, rel_stddev_stats(waketime_stddev, waketime_avg)); } @@ -193,7 +205,7 @@ static void print_summary(void) printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n", wakeup_avg, - nblocked_threads, + params.nblocked_threads, waketime_avg / USEC_PER_MSEC, rel_stddev_stats(waketime_stddev, waketime_avg)); } @@ -203,7 +215,7 @@ static void do_run_stats(struct thread_data *waking_worker) { unsigned int i; - for (i = 0; i < nwaking_threads; i++) { + for (i = 0; i < params.nwaking_threads; i++) { update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec); update_stats(&wakeup_stats, waking_worker[i].nwoken); } @@ -242,32 +254,33 @@ int bench_futex_wake_parallel(int argc, const char **argv) if (!cpu) err(EXIT_FAILURE, "calloc"); - if (!nblocked_threads) - nblocked_threads = cpu->nr; + if (!params.nblocked_threads) + params.nblocked_threads = cpu->nr; /* some sanity checks */ - if (nwaking_threads > nblocked_threads || !nwaking_threads) - nwaking_threads = nblocked_threads; + if (params.nwaking_threads > params.nblocked_threads || + !params.nwaking_threads) + params.nwaking_threads = params.nblocked_threads; - if (nblocked_threads % nwaking_threads) + if (params.nblocked_threads % params.nwaking_threads) errx(EXIT_FAILURE, "Must be perfectly divisible"); /* * Each thread will wakeup nwakes tasks in * a single futex_wait call. */ - nwakes = nblocked_threads/nwaking_threads; + nwakes = params.nblocked_threads/params.nwaking_threads; - blocked_worker = calloc(nblocked_threads, sizeof(*blocked_worker)); + blocked_worker = calloc(params.nblocked_threads, sizeof(*blocked_worker)); if (!blocked_worker) err(EXIT_FAILURE, "calloc"); - if (!fshared) + if (!params.fshared) futex_flag = FUTEX_PRIVATE_FLAG; printf("Run summary [PID %d]: blocking on %d threads (at [%s] " "futex %p), %d threads waking up %d at a time.\n\n", - getpid(), nblocked_threads, fshared ? "shared":"private", - &futex, nwaking_threads, nwakes); + getpid(), params.nblocked_threads, params.fshared ? "shared":"private", + &futex, params.nwaking_threads, nwakes); init_stats(&wakeup_stats); init_stats(&waketime_stats); @@ -278,7 +291,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) pthread_cond_init(&thread_worker, NULL); for (j = 0; j < bench_repeat && !done; j++) { - waking_worker = calloc(nwaking_threads, sizeof(*waking_worker)); + waking_worker = calloc(params.nwaking_threads, sizeof(*waking_worker)); if (!waking_worker) err(EXIT_FAILURE, "calloc"); @@ -297,14 +310,14 @@ int bench_futex_wake_parallel(int argc, const char **argv) /* Ok, all threads are patiently blocked, start waking folks up */ wakeup_threads(waking_worker, thread_attr); - for (i = 0; i < nblocked_threads; i++) { + for (i = 0; i < params.nblocked_threads; i++) { ret = pthread_join(blocked_worker[i], NULL); if (ret) err(EXIT_FAILURE, "pthread_join"); } do_run_stats(waking_worker); - if (!silent) + if (!params.silent) print_run(waking_worker, j); free(waking_worker); diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 6d217868f53c..22763774ede7 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -31,25 +31,38 @@ /* all threads will block on the same futex */ static u_int32_t futex1 = 0; -/* - * How many wakeups to do at a time. - * Default to 1 in order to make the kernel work more. - */ -static unsigned int nwakes = 1; - -pthread_t *worker; -static bool done = false, silent = false, fshared = false; +static pthread_t *worker; +static bool done = false; static pthread_mutex_t thread_lock; static pthread_cond_t thread_parent, thread_worker; static struct stats waketime_stats, wakeup_stats; -static unsigned int threads_starting, nthreads = 0; +static unsigned int threads_starting; static int futex_flag = 0; +struct parameters { + unsigned int nthreads; + unsigned int nwakes; + bool silent; + bool fshared; +}; + +static struct parameters params = { + /* + * How many wakeups to do at a time. + * Default to 1 in order to make the kernel work more. + */ + .nwakes = 1, +}; + static const struct option options[] = { - OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), - OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"), - OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), - OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), + OPT_UINTEGER('t', "threads", ¶ms.nthreads, + "Specify amount of threads"), + OPT_UINTEGER('w', "nwakes", ¶ms.nwakes, + "Specify amount of threads to wake at once"), + OPT_BOOLEAN( 's', "silent", ¶ms.silent, + "Silent mode: do not display data/details"), + OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, + "Use shared futexes instead of private ones"), OPT_END() }; @@ -84,7 +97,7 @@ static void print_summary(void) printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n", wakeup_avg, - nthreads, + params.nthreads, waketime_avg / USEC_PER_MSEC, rel_stddev_stats(waketime_stddev, waketime_avg)); } @@ -95,10 +108,10 @@ static void block_threads(pthread_t *w, cpu_set_t cpuset; unsigned int i; - threads_starting = nthreads; + threads_starting = params.nthreads; /* create and block all threads */ - for (i = 0; i < nthreads; i++) { + for (i = 0; i < params.nthreads; i++) { CPU_ZERO(&cpuset); CPU_SET(cpu->map[i % cpu->nr], &cpuset); @@ -140,19 +153,20 @@ int bench_futex_wake(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); - if (!nthreads) - nthreads = cpu->nr; + if (!params.nthreads) + params.nthreads = cpu->nr; - worker = calloc(nthreads, sizeof(*worker)); + worker = calloc(params.nthreads, sizeof(*worker)); if (!worker) err(EXIT_FAILURE, "calloc"); - if (!fshared) + if (!params.fshared) futex_flag = FUTEX_PRIVATE_FLAG; printf("Run summary [PID %d]: blocking on %d threads (at [%s] futex %p), " "waking up %d at a time.\n\n", - getpid(), nthreads, fshared ? "shared":"private", &futex1, nwakes); + getpid(), params.nthreads, params.fshared ? "shared":"private", + &futex1, params.nwakes); init_stats(&wakeup_stats); init_stats(&waketime_stats); @@ -179,20 +193,22 @@ int bench_futex_wake(int argc, const char **argv) /* Ok, all threads are patiently blocked, start waking folks up */ gettimeofday(&start, NULL); - while (nwoken != nthreads) - nwoken += futex_wake(&futex1, nwakes, futex_flag); + while (nwoken != params.nthreads) + nwoken += futex_wake(&futex1, + params.nwakes, futex_flag); gettimeofday(&end, NULL); timersub(&end, &start, &runtime); update_stats(&wakeup_stats, nwoken); update_stats(&waketime_stats, runtime.tv_usec); - if (!silent) { + if (!params.silent) { printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n", - j + 1, nwoken, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC); + j + 1, nwoken, params.nthreads, + runtime.tv_usec / (double)USEC_PER_MSEC); } - for (i = 0; i < nthreads; i++) { + for (i = 0; i < params.nthreads; i++) { ret = pthread_join(worker[i], NULL); if (ret) err(EXIT_FAILURE, "pthread_join"); -- 2.26.2 ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 1/3] perf/bench-futex: Group test parameters cleanup 2021-08-06 1:33 ` [PATCH 1/3] perf/bench-futex: Group test parameters cleanup Davidlohr Bueso @ 2021-08-06 18:30 ` Arnaldo Carvalho de Melo 0 siblings, 0 replies; 5+ messages in thread From: Arnaldo Carvalho de Melo @ 2021-08-06 18:30 UTC (permalink / raw) To: Davidlohr Bueso; +Cc: linux-kernel, linux-perf-users, Davidlohr Bueso Em Thu, Aug 05, 2021 at 06:33:27PM -0700, Davidlohr Bueso escreveu: > Do this across all futex-bench tests such that all program parameters > are neatly in their own structure, which is nicer than how we have > them now. No changes in program behavior are expected. > > Signed-off-by: Davidlohr Bueso <dbueso@suse.de> > --- > tools/perf/bench/futex-hash.c | 68 ++++++++++++++--------- > tools/perf/bench/futex-lock-pi.c | 66 +++++++++++++--------- > tools/perf/bench/futex-requeue.c | 76 ++++++++++++++++---------- > tools/perf/bench/futex-wake-parallel.c | 69 +++++++++++++---------- > tools/perf/bench/futex-wake.c | 68 ++++++++++++++--------- > 5 files changed, 210 insertions(+), 137 deletions(-) > > diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c > index b65373ce5c4f..2d86602f89e2 100644 > --- a/tools/perf/bench/futex-hash.c > +++ b/tools/perf/bench/futex-hash.c > @@ -29,11 +29,7 @@ > > #include <err.h> > > -static unsigned int nthreads = 0; > -static unsigned int nsecs = 10; > -/* amount of futexes per thread */ > -static unsigned int nfutexes = 1024; > -static bool fshared = false, done = false, silent = false; > +static bool done = false; > static int futex_flag = 0; > > struct timeval bench__start, bench__end, bench__runtime; > @@ -49,12 +45,30 @@ struct worker { > unsigned long ops; > }; > > +struct parameters { > + unsigned int nthreads; > + unsigned int nfutexes; tab please > + unsigned int runtime; > + bool silent; > + bool fshared; > +}; Also aren't all these 'struct parameter' definitions almost the same? Why not have it in a .h file shared by all these futex tests? Also consider renaming it to something less generic so that things like ctags stay happy, bench_futex_parms perhaps? > +static struct parameters params = { > + .nfutexes = 1024, > + .runtime = 10, > +}; > + > static const struct option options[] = { > - OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), > - OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"), > - OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"), > - OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), > - OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), > + OPT_UINTEGER('t', "threads", ¶ms.nthreads, > + "Specify amount of threads"), No need to break down these lines into multiple, keeping them as one-liners should help reviewing a bit. > + OPT_UINTEGER('r', "runtime", ¶ms.runtime, > + "Specify runtime (in seconds)"), > + OPT_UINTEGER('f', "futexes", ¶ms.nfutexes, > + "Specify amount of futexes per threads"), > + OPT_BOOLEAN( 's', "silent", ¶ms.silent, > + "Silent mode: do not display data/details"), > + OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, > + "Use shared futexes instead of private ones"), > OPT_END() > }; > > @@ -78,7 +92,7 @@ static void *workerfn(void *arg) > pthread_mutex_unlock(&thread_lock); > > do { > - for (i = 0; i < nfutexes; i++, ops++) { > + for (i = 0; i < params.nfutexes; i++, ops++) { > /* > * We want the futex calls to fail in order to stress > * the hashing of uaddr and not measure other steps, > @@ -86,7 +100,7 @@ static void *workerfn(void *arg) > * the critical region protected by hb->lock. > */ > ret = futex_wait(&w->futex[i], 1234, NULL, futex_flag); > - if (!silent && > + if (!params.silent && > (!ret || errno != EAGAIN || errno != EWOULDBLOCK)) > warn("Non-expected futex return call"); > } > @@ -112,7 +126,7 @@ static void print_summary(void) > double stddev = stddev_stats(&throughput_stats); > > printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", > - !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), > + !params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), > (int)bench__runtime.tv_sec); > } > > @@ -141,30 +155,30 @@ int bench_futex_hash(int argc, const char **argv) > act.sa_sigaction = toggle_done; > sigaction(SIGINT, &act, NULL); > > - if (!nthreads) /* default to the number of CPUs */ > - nthreads = cpu->nr; > + if (!params.nthreads) /* default to the number of CPUs */ > + params.nthreads = cpu->nr; > > - worker = calloc(nthreads, sizeof(*worker)); > + worker = calloc(params.nthreads, sizeof(*worker)); > if (!worker) > goto errmem; > > - if (!fshared) > + if (!params.fshared) > futex_flag = FUTEX_PRIVATE_FLAG; > > printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n", > - getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs); > + getpid(), params.nthreads, params.nfutexes, params.fshared ? "shared":"private", params.runtime); > > init_stats(&throughput_stats); > pthread_mutex_init(&thread_lock, NULL); > pthread_cond_init(&thread_parent, NULL); > pthread_cond_init(&thread_worker, NULL); > > - threads_starting = nthreads; > + threads_starting = params.nthreads; > pthread_attr_init(&thread_attr); > gettimeofday(&bench__start, NULL); > - for (i = 0; i < nthreads; i++) { > + for (i = 0; i < params.nthreads; i++) { > worker[i].tid = i; > - worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex)); > + worker[i].futex = calloc(params.nfutexes, sizeof(*worker[i].futex)); > if (!worker[i].futex) > goto errmem; > > @@ -189,10 +203,10 @@ int bench_futex_hash(int argc, const char **argv) > pthread_cond_broadcast(&thread_worker); > pthread_mutex_unlock(&thread_lock); > > - sleep(nsecs); > + sleep(params.runtime); > toggle_done(0, NULL, NULL); > > - for (i = 0; i < nthreads; i++) { > + for (i = 0; i < params.nthreads; i++) { > ret = pthread_join(worker[i].thread, NULL); > if (ret) > err(EXIT_FAILURE, "pthread_join"); > @@ -203,18 +217,18 @@ int bench_futex_hash(int argc, const char **argv) > pthread_cond_destroy(&thread_worker); > pthread_mutex_destroy(&thread_lock); > > - for (i = 0; i < nthreads; i++) { > + for (i = 0; i < params.nthreads; i++) { > unsigned long t = bench__runtime.tv_sec > 0 ? > worker[i].ops / bench__runtime.tv_sec : 0; > update_stats(&throughput_stats, t); > - if (!silent) { > - if (nfutexes == 1) > + if (!params.silent) { > + if (params.nfutexes == 1) > printf("[thread %2d] futex: %p [ %ld ops/sec ]\n", > worker[i].tid, &worker[i].futex[0], t); > else > printf("[thread %2d] futexes: %p ... %p [ %ld ops/sec ]\n", > worker[i].tid, &worker[i].futex[0], > - &worker[i].futex[nfutexes-1], t); > + &worker[i].futex[params.nfutexes-1], t); > } > > zfree(&worker[i].futex); > diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c > index 89c6d160379c..9fc994beb933 100644 > --- a/tools/perf/bench/futex-lock-pi.c > +++ b/tools/perf/bench/futex-lock-pi.c > @@ -31,22 +31,36 @@ struct worker { > > static u_int32_t global_futex = 0; > static struct worker *worker; > -static unsigned int nsecs = 10; > -static bool silent = false, multi = false; > -static bool done = false, fshared = false; > -static unsigned int nthreads = 0; > +static bool done = false; > static int futex_flag = 0; > static pthread_mutex_t thread_lock; > static unsigned int threads_starting; > static struct stats throughput_stats; > static pthread_cond_t thread_parent, thread_worker; > > +struct parameters { > + unsigned int nthreads; > + unsigned int runtime; > + bool multi; > + bool silent; > + bool fshared; > +}; > + > +static struct parameters params = { > + .runtime = 10, > +}; > + > static const struct option options[] = { > - OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), > - OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"), > - OPT_BOOLEAN( 'M', "multi", &multi, "Use multiple futexes"), > - OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), > - OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), > + OPT_UINTEGER('t', "threads", ¶ms.nthreads, > + "Specify amount of threads"), > + OPT_UINTEGER('r', "runtime", ¶ms.runtime, > + "Specify runtime (in seconds)"), > + OPT_BOOLEAN( 'M', "params.multi", ¶ms.multi, > + "Use params.multiple futexes"), > + OPT_BOOLEAN( 's', "params.silent", ¶ms.silent, > + "Params.Silent mode: do not display data/details"), > + OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, > + "Use shared futexes instead of private ones"), > OPT_END() > }; > > @@ -61,7 +75,7 @@ static void print_summary(void) > double stddev = stddev_stats(&throughput_stats); > > printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", > - !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), > + !params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), > (int)bench__runtime.tv_sec); > } > > @@ -93,7 +107,7 @@ static void *workerfn(void *arg) > ret = futex_lock_pi(w->futex, NULL, futex_flag); > > if (ret) { /* handle lock acquisition */ > - if (!silent) > + if (!params.silent) > warn("thread %d: Could not lock pi-lock for %p (%d)", > w->tid, w->futex, ret); > if (done) > @@ -104,7 +118,7 @@ static void *workerfn(void *arg) > > usleep(1); > ret = futex_unlock_pi(w->futex, futex_flag); > - if (ret && !silent) > + if (ret && !params.silent) > warn("thread %d: Could not unlock pi-lock for %p (%d)", > w->tid, w->futex, ret); > ops++; /* account for thread's share of work */ > @@ -120,12 +134,12 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr, > cpu_set_t cpuset; > unsigned int i; > > - threads_starting = nthreads; > + threads_starting = params.nthreads; > > - for (i = 0; i < nthreads; i++) { > + for (i = 0; i < params.nthreads; i++) { > worker[i].tid = i; > > - if (multi) { > + if (params.multi) { > worker[i].futex = calloc(1, sizeof(u_int32_t)); > if (!worker[i].futex) > err(EXIT_FAILURE, "calloc"); > @@ -164,25 +178,25 @@ int bench_futex_lock_pi(int argc, const char **argv) > act.sa_sigaction = toggle_done; > sigaction(SIGINT, &act, NULL); > > - if (!nthreads) > - nthreads = cpu->nr; > + if (!params.nthreads) > + params.nthreads = cpu->nr; > > - worker = calloc(nthreads, sizeof(*worker)); > + worker = calloc(params.nthreads, sizeof(*worker)); > if (!worker) > err(EXIT_FAILURE, "calloc"); > > - if (!fshared) > + if (!params.fshared) > futex_flag = FUTEX_PRIVATE_FLAG; > > printf("Run summary [PID %d]: %d threads doing pi lock/unlock pairing for %d secs.\n\n", > - getpid(), nthreads, nsecs); > + getpid(), params.nthreads, params.runtime); > > init_stats(&throughput_stats); > pthread_mutex_init(&thread_lock, NULL); > pthread_cond_init(&thread_parent, NULL); > pthread_cond_init(&thread_worker, NULL); > > - threads_starting = nthreads; > + threads_starting = params.nthreads; > pthread_attr_init(&thread_attr); > gettimeofday(&bench__start, NULL); > > @@ -195,10 +209,10 @@ int bench_futex_lock_pi(int argc, const char **argv) > pthread_cond_broadcast(&thread_worker); > pthread_mutex_unlock(&thread_lock); > > - sleep(nsecs); > + sleep(params.runtime); > toggle_done(0, NULL, NULL); > > - for (i = 0; i < nthreads; i++) { > + for (i = 0; i < params.nthreads; i++) { > ret = pthread_join(worker[i].thread, NULL); > if (ret) > err(EXIT_FAILURE, "pthread_join"); > @@ -209,16 +223,16 @@ int bench_futex_lock_pi(int argc, const char **argv) > pthread_cond_destroy(&thread_worker); > pthread_mutex_destroy(&thread_lock); > > - for (i = 0; i < nthreads; i++) { > + for (i = 0; i < params.nthreads; i++) { > unsigned long t = bench__runtime.tv_sec > 0 ? > worker[i].ops / bench__runtime.tv_sec : 0; > > update_stats(&throughput_stats, t); > - if (!silent) > + if (!params.silent) > printf("[thread %3d] futex: %p [ %ld ops/sec ]\n", > worker[i].tid, worker[i].futex, t); > > - if (multi) > + if (params.multi) > zfree(&worker[i].futex); > } > > diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c > index 5fa23295ee5f..b65761e98245 100644 > --- a/tools/perf/bench/futex-requeue.c > +++ b/tools/perf/bench/futex-requeue.c > @@ -30,25 +30,38 @@ > > static u_int32_t futex1 = 0, futex2 = 0; > > -/* > - * How many tasks to requeue at a time. > - * Default to 1 in order to make the kernel work more. > - */ > -static unsigned int nrequeue = 1; > - > static pthread_t *worker; > -static bool done = false, silent = false, fshared = false; > +static bool done = false; > static pthread_mutex_t thread_lock; > static pthread_cond_t thread_parent, thread_worker; > static struct stats requeuetime_stats, requeued_stats; > -static unsigned int threads_starting, nthreads = 0; > +static unsigned int threads_starting; > static int futex_flag = 0; > > +struct parameters { > + unsigned int nthreads; > + unsigned int nrequeue; > + bool silent; > + bool fshared; > +}; > + > +static struct parameters params = { > + /* > + * How many tasks to requeue at a time. > + * Default to 1 in order to make the kernel work more. > + */ > + .nrequeue = 1, > +}; > + > static const struct option options[] = { > - OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), > - OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"), > - OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), > - OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), > + OPT_UINTEGER('t', "threads", ¶ms.nthreads, > + "Specify amount of threads"), > + OPT_UINTEGER('q', "nrequeue", ¶ms.nrequeue, > + "Specify amount of threads to requeue at once"), > + OPT_BOOLEAN( 's', "silent", ¶ms.silent, > + "Silent mode: do not display data/details"), > + OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, > + "Use shared futexes instead of private ones"), > OPT_END() > }; > > @@ -65,7 +78,7 @@ static void print_summary(void) > > printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n", > requeued_avg, > - nthreads, > + params.nthreads, > requeuetime_avg / USEC_PER_MSEC, > rel_stddev_stats(requeuetime_stddev, requeuetime_avg)); > } > @@ -89,10 +102,10 @@ static void block_threads(pthread_t *w, > cpu_set_t cpuset; > unsigned int i; > > - threads_starting = nthreads; > + threads_starting = params.nthreads; > > /* create and block all threads */ > - for (i = 0; i < nthreads; i++) { > + for (i = 0; i < params.nthreads; i++) { > CPU_ZERO(&cpuset); > CPU_SET(cpu->map[i % cpu->nr], &cpuset); > > @@ -132,22 +145,22 @@ int bench_futex_requeue(int argc, const char **argv) > act.sa_sigaction = toggle_done; > sigaction(SIGINT, &act, NULL); > > - if (!nthreads) > - nthreads = cpu->nr; > + if (!params.nthreads) > + params.nthreads = cpu->nr; > > - worker = calloc(nthreads, sizeof(*worker)); > + worker = calloc(params.nthreads, sizeof(*worker)); > if (!worker) > err(EXIT_FAILURE, "calloc"); > > - if (!fshared) > + if (!params.fshared) > futex_flag = FUTEX_PRIVATE_FLAG; > > - if (nrequeue > nthreads) > - nrequeue = nthreads; > + if (params.nrequeue > params.nthreads) > + params.nrequeue = params.nthreads; > > printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), " > - "%d at a time.\n\n", getpid(), nthreads, > - fshared ? "shared":"private", &futex1, &futex2, nrequeue); > + "%d at a time.\n\n", getpid(), params.nthreads, > + params.fshared ? "shared":"private", &futex1, &futex2, params.nrequeue); > > init_stats(&requeued_stats); > init_stats(&requeuetime_stats); > @@ -174,13 +187,14 @@ int bench_futex_requeue(int argc, const char **argv) > > /* Ok, all threads are patiently blocked, start requeueing */ > gettimeofday(&start, NULL); > - while (nrequeued < nthreads) { > + while (nrequeued < params.nthreads) { > /* > * Do not wakeup any tasks blocked on futex1, allowing > * us to really measure futex_wait functionality. > */ > nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0, > - nrequeue, futex_flag); > + params.nrequeue, > + futex_flag); > } > > gettimeofday(&end, NULL); > @@ -189,17 +203,19 @@ int bench_futex_requeue(int argc, const char **argv) > update_stats(&requeued_stats, nrequeued); > update_stats(&requeuetime_stats, runtime.tv_usec); > > - if (!silent) { > + if (!params.silent) { > printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n", > - j + 1, nrequeued, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC); > + j + 1, nrequeued, params.nthreads, > + runtime.tv_usec / (double)USEC_PER_MSEC); > } > > /* everybody should be blocked on futex2, wake'em up */ > nrequeued = futex_wake(&futex2, nrequeued, futex_flag); > - if (nthreads != nrequeued) > - warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads); > + if (params.nthreads != nrequeued) > + warnx("couldn't wakeup all tasks (%d/%d)", > + nrequeued, params.nthreads); > > - for (i = 0; i < nthreads; i++) { > + for (i = 0; i < params.nthreads; i++) { > ret = pthread_join(worker[i], NULL); > if (ret) > err(EXIT_FAILURE, "pthread_join"); > diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c > index 6e6f5247e1fe..a80dfff5fe37 100644 > --- a/tools/perf/bench/futex-wake-parallel.c > +++ b/tools/perf/bench/futex-wake-parallel.c > @@ -47,8 +47,7 @@ static unsigned int nwakes = 1; > static u_int32_t futex = 0; > > static pthread_t *blocked_worker; > -static bool done = false, silent = false, fshared = false; > -static unsigned int nblocked_threads = 0, nwaking_threads = 0; > +static bool done = false; > static pthread_mutex_t thread_lock; > static pthread_cond_t thread_parent, thread_worker; > static pthread_barrier_t barrier; > @@ -56,11 +55,24 @@ static struct stats waketime_stats, wakeup_stats; > static unsigned int threads_starting; > static int futex_flag = 0; > > +struct parameters { > + unsigned int nwaking_threads; > + unsigned int nblocked_threads; > + bool silent; > + bool fshared; > +}; > + > +static struct parameters params; > + > static const struct option options[] = { > - OPT_UINTEGER('t', "threads", &nblocked_threads, "Specify amount of threads"), > - OPT_UINTEGER('w', "nwakers", &nwaking_threads, "Specify amount of waking threads"), > - OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), > - OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), > + OPT_UINTEGER('t', "threads", ¶ms.nblocked_threads, > + "Specify amount of threads"), > + OPT_UINTEGER('w', "nwakers", ¶ms.nwaking_threads, > + "Specify amount of waking threads"), > + OPT_BOOLEAN( 's', "silent", ¶ms.silent, > + "Silent mode: do not display data/details"), > + OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, > + "Use shared futexes instead of private ones"), > OPT_END() > }; > > @@ -96,10 +108,10 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr) > > pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE); > > - pthread_barrier_init(&barrier, NULL, nwaking_threads + 1); > + pthread_barrier_init(&barrier, NULL, params.nwaking_threads + 1); > > /* create and block all threads */ > - for (i = 0; i < nwaking_threads; i++) { > + for (i = 0; i < params.nwaking_threads; i++) { > /* > * Thread creation order will impact per-thread latency > * as it will affect the order to acquire the hb spinlock. > @@ -112,7 +124,7 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr) > > pthread_barrier_wait(&barrier); > > - for (i = 0; i < nwaking_threads; i++) > + for (i = 0; i < params.nwaking_threads; i++) > if (pthread_join(td[i].worker, NULL)) > err(EXIT_FAILURE, "pthread_join"); > > @@ -143,10 +155,10 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr, > cpu_set_t cpuset; > unsigned int i; > > - threads_starting = nblocked_threads; > + threads_starting = params.nblocked_threads; > > /* create and block all threads */ > - for (i = 0; i < nblocked_threads; i++) { > + for (i = 0; i < params.nblocked_threads; i++) { > CPU_ZERO(&cpuset); > CPU_SET(cpu->map[i % cpu->nr], &cpuset); > > @@ -167,7 +179,7 @@ static void print_run(struct thread_data *waking_worker, unsigned int run_num) > init_stats(&__wakeup_stats); > init_stats(&__waketime_stats); > > - for (i = 0; i < nwaking_threads; i++) { > + for (i = 0; i < params.nwaking_threads; i++) { > update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec); > update_stats(&__wakeup_stats, waking_worker[i].nwoken); > } > @@ -178,7 +190,7 @@ static void print_run(struct thread_data *waking_worker, unsigned int run_num) > > printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) " > "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg, > - nblocked_threads, waketime_avg / USEC_PER_MSEC, > + params.nblocked_threads, waketime_avg / USEC_PER_MSEC, > rel_stddev_stats(waketime_stddev, waketime_avg)); > } > > @@ -193,7 +205,7 @@ static void print_summary(void) > > printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n", > wakeup_avg, > - nblocked_threads, > + params.nblocked_threads, > waketime_avg / USEC_PER_MSEC, > rel_stddev_stats(waketime_stddev, waketime_avg)); > } > @@ -203,7 +215,7 @@ static void do_run_stats(struct thread_data *waking_worker) > { > unsigned int i; > > - for (i = 0; i < nwaking_threads; i++) { > + for (i = 0; i < params.nwaking_threads; i++) { > update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec); > update_stats(&wakeup_stats, waking_worker[i].nwoken); > } > @@ -242,32 +254,33 @@ int bench_futex_wake_parallel(int argc, const char **argv) > if (!cpu) > err(EXIT_FAILURE, "calloc"); > > - if (!nblocked_threads) > - nblocked_threads = cpu->nr; > + if (!params.nblocked_threads) > + params.nblocked_threads = cpu->nr; > > /* some sanity checks */ > - if (nwaking_threads > nblocked_threads || !nwaking_threads) > - nwaking_threads = nblocked_threads; > + if (params.nwaking_threads > params.nblocked_threads || > + !params.nwaking_threads) > + params.nwaking_threads = params.nblocked_threads; > > - if (nblocked_threads % nwaking_threads) > + if (params.nblocked_threads % params.nwaking_threads) > errx(EXIT_FAILURE, "Must be perfectly divisible"); > /* > * Each thread will wakeup nwakes tasks in > * a single futex_wait call. > */ > - nwakes = nblocked_threads/nwaking_threads; > + nwakes = params.nblocked_threads/params.nwaking_threads; > > - blocked_worker = calloc(nblocked_threads, sizeof(*blocked_worker)); > + blocked_worker = calloc(params.nblocked_threads, sizeof(*blocked_worker)); > if (!blocked_worker) > err(EXIT_FAILURE, "calloc"); > > - if (!fshared) > + if (!params.fshared) > futex_flag = FUTEX_PRIVATE_FLAG; > > printf("Run summary [PID %d]: blocking on %d threads (at [%s] " > "futex %p), %d threads waking up %d at a time.\n\n", > - getpid(), nblocked_threads, fshared ? "shared":"private", > - &futex, nwaking_threads, nwakes); > + getpid(), params.nblocked_threads, params.fshared ? "shared":"private", > + &futex, params.nwaking_threads, nwakes); > > init_stats(&wakeup_stats); > init_stats(&waketime_stats); > @@ -278,7 +291,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) > pthread_cond_init(&thread_worker, NULL); > > for (j = 0; j < bench_repeat && !done; j++) { > - waking_worker = calloc(nwaking_threads, sizeof(*waking_worker)); > + waking_worker = calloc(params.nwaking_threads, sizeof(*waking_worker)); > if (!waking_worker) > err(EXIT_FAILURE, "calloc"); > > @@ -297,14 +310,14 @@ int bench_futex_wake_parallel(int argc, const char **argv) > /* Ok, all threads are patiently blocked, start waking folks up */ > wakeup_threads(waking_worker, thread_attr); > > - for (i = 0; i < nblocked_threads; i++) { > + for (i = 0; i < params.nblocked_threads; i++) { > ret = pthread_join(blocked_worker[i], NULL); > if (ret) > err(EXIT_FAILURE, "pthread_join"); > } > > do_run_stats(waking_worker); > - if (!silent) > + if (!params.silent) > print_run(waking_worker, j); > > free(waking_worker); > diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c > index 6d217868f53c..22763774ede7 100644 > --- a/tools/perf/bench/futex-wake.c > +++ b/tools/perf/bench/futex-wake.c > @@ -31,25 +31,38 @@ > /* all threads will block on the same futex */ > static u_int32_t futex1 = 0; > > -/* > - * How many wakeups to do at a time. > - * Default to 1 in order to make the kernel work more. > - */ > -static unsigned int nwakes = 1; > - > -pthread_t *worker; > -static bool done = false, silent = false, fshared = false; > +static pthread_t *worker; > +static bool done = false; > static pthread_mutex_t thread_lock; > static pthread_cond_t thread_parent, thread_worker; > static struct stats waketime_stats, wakeup_stats; > -static unsigned int threads_starting, nthreads = 0; > +static unsigned int threads_starting; > static int futex_flag = 0; > > +struct parameters { > + unsigned int nthreads; > + unsigned int nwakes; > + bool silent; > + bool fshared; > +}; > + > +static struct parameters params = { > + /* > + * How many wakeups to do at a time. > + * Default to 1 in order to make the kernel work more. > + */ > + .nwakes = 1, > +}; > + > static const struct option options[] = { > - OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), > - OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"), > - OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), > - OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), > + OPT_UINTEGER('t', "threads", ¶ms.nthreads, > + "Specify amount of threads"), > + OPT_UINTEGER('w', "nwakes", ¶ms.nwakes, > + "Specify amount of threads to wake at once"), > + OPT_BOOLEAN( 's', "silent", ¶ms.silent, > + "Silent mode: do not display data/details"), > + OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, > + "Use shared futexes instead of private ones"), > OPT_END() > }; > > @@ -84,7 +97,7 @@ static void print_summary(void) > > printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n", > wakeup_avg, > - nthreads, > + params.nthreads, > waketime_avg / USEC_PER_MSEC, > rel_stddev_stats(waketime_stddev, waketime_avg)); > } > @@ -95,10 +108,10 @@ static void block_threads(pthread_t *w, > cpu_set_t cpuset; > unsigned int i; > > - threads_starting = nthreads; > + threads_starting = params.nthreads; > > /* create and block all threads */ > - for (i = 0; i < nthreads; i++) { > + for (i = 0; i < params.nthreads; i++) { > CPU_ZERO(&cpuset); > CPU_SET(cpu->map[i % cpu->nr], &cpuset); > > @@ -140,19 +153,20 @@ int bench_futex_wake(int argc, const char **argv) > act.sa_sigaction = toggle_done; > sigaction(SIGINT, &act, NULL); > > - if (!nthreads) > - nthreads = cpu->nr; > + if (!params.nthreads) > + params.nthreads = cpu->nr; > > - worker = calloc(nthreads, sizeof(*worker)); > + worker = calloc(params.nthreads, sizeof(*worker)); > if (!worker) > err(EXIT_FAILURE, "calloc"); > > - if (!fshared) > + if (!params.fshared) > futex_flag = FUTEX_PRIVATE_FLAG; > > printf("Run summary [PID %d]: blocking on %d threads (at [%s] futex %p), " > "waking up %d at a time.\n\n", > - getpid(), nthreads, fshared ? "shared":"private", &futex1, nwakes); > + getpid(), params.nthreads, params.fshared ? "shared":"private", > + &futex1, params.nwakes); > > init_stats(&wakeup_stats); > init_stats(&waketime_stats); > @@ -179,20 +193,22 @@ int bench_futex_wake(int argc, const char **argv) > > /* Ok, all threads are patiently blocked, start waking folks up */ > gettimeofday(&start, NULL); > - while (nwoken != nthreads) > - nwoken += futex_wake(&futex1, nwakes, futex_flag); > + while (nwoken != params.nthreads) > + nwoken += futex_wake(&futex1, > + params.nwakes, futex_flag); > gettimeofday(&end, NULL); > timersub(&end, &start, &runtime); > > update_stats(&wakeup_stats, nwoken); > update_stats(&waketime_stats, runtime.tv_usec); > > - if (!silent) { > + if (!params.silent) { > printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n", > - j + 1, nwoken, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC); > + j + 1, nwoken, params.nthreads, > + runtime.tv_usec / (double)USEC_PER_MSEC); > } > > - for (i = 0; i < nthreads; i++) { > + for (i = 0; i < params.nthreads; i++) { > ret = pthread_join(worker[i], NULL); > if (ret) > err(EXIT_FAILURE, "pthread_join"); > -- > 2.26.2 > -- - Arnaldo ^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 2/3] perf/bench-futex: Add --mlockall parameter 2021-08-06 1:33 [PATCH -tip 0/3] perf/bench-futex: Misc updates Davidlohr Bueso 2021-08-06 1:33 ` [PATCH 1/3] perf/bench-futex: Group test parameters cleanup Davidlohr Bueso @ 2021-08-06 1:33 ` Davidlohr Bueso 2021-08-06 1:33 ` [PATCH 3/3] perf/bench-futex, requeue: Add --pi parameter Davidlohr Bueso 2 siblings, 0 replies; 5+ messages in thread From: Davidlohr Bueso @ 2021-08-06 1:33 UTC (permalink / raw) To: acme; +Cc: linux-kernel, linux-perf-users, dave, Davidlohr Bueso This adds, across all futex benchmarks, the -m/--mlockall option which is a common operation for realtime workloads by not incurring in page faults in paths that are deterministic. As such, threads started after a call to mlockall(2) will generate page faults immediately since the new stack is immediately forced to memory, due to the MCL_FUTURE flag. Signed-off-by: Davidlohr Bueso <dbueso@suse.de> --- tools/perf/bench/futex-hash.c | 9 +++++++++ tools/perf/bench/futex-lock-pi.c | 9 +++++++++ tools/perf/bench/futex-requeue.c | 9 +++++++++ tools/perf/bench/futex-wake-parallel.c | 9 +++++++++ tools/perf/bench/futex-wake.c | 9 +++++++++ 5 files changed, 45 insertions(+) diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 2d86602f89e2..ba8ceff4e098 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -20,6 +20,7 @@ #include <linux/kernel.h> #include <linux/zalloc.h> #include <sys/time.h> +#include <sys/mman.h> #include <perf/cpumap.h> #include "../util/stat.h" @@ -51,6 +52,7 @@ struct parameters { unsigned int runtime; bool silent; bool fshared; + bool mlockall; }; static struct parameters params = { @@ -69,6 +71,8 @@ static const struct option options[] = { "Silent mode: do not display data/details"), OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, "Use shared futexes instead of private ones"), + OPT_BOOLEAN( 'm', "mlockall", ¶ms.mlockall, + "Lock all current and future memory"), OPT_END() }; @@ -155,6 +159,11 @@ int bench_futex_hash(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); + if (params.mlockall) { + if (mlockall(MCL_CURRENT | MCL_FUTURE)) + err(EXIT_FAILURE, "mlockall"); + } + if (!params.nthreads) /* default to the number of CPUs */ params.nthreads = cpu->nr; diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 9fc994beb933..ab387e73839e 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -21,6 +21,7 @@ #include <err.h> #include <stdlib.h> #include <sys/time.h> +#include <sys/mman.h> struct worker { int tid; @@ -44,6 +45,7 @@ struct parameters { bool multi; bool silent; bool fshared; + bool mlockall; }; static struct parameters params = { @@ -61,6 +63,8 @@ static const struct option options[] = { "Params.Silent mode: do not display data/details"), OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, "Use shared futexes instead of private ones"), + OPT_BOOLEAN( 'm', "mlockall", ¶ms.mlockall, + "Lock all current and future memory"), OPT_END() }; @@ -178,6 +182,11 @@ int bench_futex_lock_pi(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); + if (params.mlockall) { + if (mlockall(MCL_CURRENT | MCL_FUTURE)) + err(EXIT_FAILURE, "mlockall"); + } + if (!params.nthreads) params.nthreads = cpu->nr; diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index b65761e98245..51f2c0d5e6f8 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -27,6 +27,7 @@ #include <err.h> #include <stdlib.h> #include <sys/time.h> +#include <sys/mman.h> static u_int32_t futex1 = 0, futex2 = 0; @@ -43,6 +44,7 @@ struct parameters { unsigned int nrequeue; bool silent; bool fshared; + bool mlockall; }; static struct parameters params = { @@ -62,6 +64,8 @@ static const struct option options[] = { "Silent mode: do not display data/details"), OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, "Use shared futexes instead of private ones"), + OPT_BOOLEAN( 'm', "mlockall", ¶ms.mlockall, + "Lock all current and future memory"), OPT_END() }; @@ -145,6 +149,11 @@ int bench_futex_requeue(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); + if (params.mlockall) { + if (mlockall(MCL_CURRENT | MCL_FUTURE)) + err(EXIT_FAILURE, "mlockall"); + } + if (!params.nthreads) params.nthreads = cpu->nr; diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index a80dfff5fe37..d6d71374f535 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -34,6 +34,7 @@ int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe #include <err.h> #include <stdlib.h> #include <sys/time.h> +#include <sys/mman.h> struct thread_data { pthread_t worker; @@ -60,6 +61,7 @@ struct parameters { unsigned int nblocked_threads; bool silent; bool fshared; + bool mlockall; }; static struct parameters params; @@ -73,6 +75,8 @@ static const struct option options[] = { "Silent mode: do not display data/details"), OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, "Use shared futexes instead of private ones"), + OPT_BOOLEAN( 'm', "mlockall", ¶ms.mlockall, + "Lock all current and future memory"), OPT_END() }; @@ -250,6 +254,11 @@ int bench_futex_wake_parallel(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); + if (params.mlockall) { + if (mlockall(MCL_CURRENT | MCL_FUTURE)) + err(EXIT_FAILURE, "mlockall"); + } + cpu = perf_cpu_map__new(NULL); if (!cpu) err(EXIT_FAILURE, "calloc"); diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 22763774ede7..66f1338d66bc 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -27,6 +27,7 @@ #include <err.h> #include <stdlib.h> #include <sys/time.h> +#include <sys/mman.h> /* all threads will block on the same futex */ static u_int32_t futex1 = 0; @@ -44,6 +45,7 @@ struct parameters { unsigned int nwakes; bool silent; bool fshared; + bool mlockall; }; static struct parameters params = { @@ -63,6 +65,8 @@ static const struct option options[] = { "Silent mode: do not display data/details"), OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, "Use shared futexes instead of private ones"), + OPT_BOOLEAN( 'm', "mlockall", ¶ms.mlockall, + "Lock all current and future memory"), OPT_END() }; @@ -153,6 +157,11 @@ int bench_futex_wake(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); + if (params.mlockall) { + if (mlockall(MCL_CURRENT | MCL_FUTURE)) + err(EXIT_FAILURE, "mlockall"); + } + if (!params.nthreads) params.nthreads = cpu->nr; -- 2.26.2 ^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 3/3] perf/bench-futex, requeue: Add --pi parameter 2021-08-06 1:33 [PATCH -tip 0/3] perf/bench-futex: Misc updates Davidlohr Bueso 2021-08-06 1:33 ` [PATCH 1/3] perf/bench-futex: Group test parameters cleanup Davidlohr Bueso 2021-08-06 1:33 ` [PATCH 2/3] perf/bench-futex: Add --mlockall parameter Davidlohr Bueso @ 2021-08-06 1:33 ` Davidlohr Bueso 2 siblings, 0 replies; 5+ messages in thread From: Davidlohr Bueso @ 2021-08-06 1:33 UTC (permalink / raw) To: acme; +Cc: linux-kernel, linux-perf-users, dave, Davidlohr Bueso This extends the program to measure WAIT_REQUEUE_PI+CMP_REQUEUE_PI pairs, which are the underlying machinery behind priority-inheritance aware condition variables. The defaults are the same as with the regular non-pi version, requeueing one task at a time, with the exception that PI will always wakeup the first waiter. Signed-off-by: Davidlohr Bueso <dbueso@suse.de> --- tools/perf/bench/futex-requeue.c | 81 +++++++++++++++++++++++++------- tools/perf/bench/futex.h | 40 ++++++++++++++-- 2 files changed, 100 insertions(+), 21 deletions(-) diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 51f2c0d5e6f8..fcbf04235d94 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -6,7 +6,8 @@ * on futex2, N at a time. * * This program is particularly useful to measure the latency of nthread - * requeues without waking up any tasks -- thus mimicking a regular futex_wait. + * requeues without waking up any tasks (in the non-pi case) -- thus + * mimicking a regular futex_wait. */ /* For the CLR_() macros */ @@ -41,7 +42,8 @@ static int futex_flag = 0; struct parameters { unsigned int nthreads; - unsigned int nrequeue; + unsigned int nrequeue; + bool pi; bool silent; bool fshared; bool mlockall; @@ -60,6 +62,8 @@ static const struct option options[] = { "Specify amount of threads"), OPT_UINTEGER('q', "nrequeue", ¶ms.nrequeue, "Specify amount of threads to requeue at once"), + OPT_BOOLEAN( 'p', "pi", ¶ms.pi, + "Use PI-aware variants of FUTEX_CMP_REQUEUE"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, @@ -96,7 +100,18 @@ static void *workerfn(void *arg __maybe_unused) pthread_cond_wait(&thread_worker, &thread_lock); pthread_mutex_unlock(&thread_lock); - futex_wait(&futex1, 0, NULL, futex_flag); + if (!params.pi) + futex_wait(&futex1, 0, NULL, futex_flag); + else { + int ret = futex_wait_requeue_pi(&futex1, 0, &futex2, + NULL, futex_flag); + if (ret) + warnx("waiter was not requeued"); + else + futex_unlock_pi(&futex2, futex_flag); + } + + return NULL; } @@ -167,9 +182,10 @@ int bench_futex_requeue(int argc, const char **argv) if (params.nrequeue > params.nthreads) params.nrequeue = params.nthreads; - printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), " + printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %s%p), " "%d at a time.\n\n", getpid(), params.nthreads, - params.fshared ? "shared":"private", &futex1, &futex2, params.nrequeue); + params.fshared ? "shared":"private", &futex1, + params.pi ? "PI ": "", &futex2, params.nrequeue); init_stats(&requeued_stats); init_stats(&requeuetime_stats); @@ -179,7 +195,7 @@ int bench_futex_requeue(int argc, const char **argv) pthread_cond_init(&thread_worker, NULL); for (j = 0; j < bench_repeat && !done; j++) { - unsigned int nrequeued = 0; + unsigned int nrequeued = 0, wakeups = 0; struct timeval start, end, runtime; /* create, launch & block all threads */ @@ -197,13 +213,30 @@ int bench_futex_requeue(int argc, const char **argv) /* Ok, all threads are patiently blocked, start requeueing */ gettimeofday(&start, NULL); while (nrequeued < params.nthreads) { + int r; + /* - * Do not wakeup any tasks blocked on futex1, allowing - * us to really measure futex_wait functionality. + * For the regular non-pi case, do not wakeup any tasks + * blocked on futex1, allowing us to really measure + * futex_wait functionality. For the PI case the first + * waiter is always awoken. */ - nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0, - params.nrequeue, - futex_flag); + if (!params.pi) { + r = futex_cmp_requeue(&futex1, 0, &futex2, 0, + params.nrequeue, + futex_flag); + } else { + r = futex_cmp_requeue_pi(&futex1, 0, &futex2, + params.nrequeue, + futex_flag); + wakeups++; /* assume no error */ + } + + if (r < 0) + err(EXIT_FAILURE, "couldn't requeue from %p to %p", + &futex1, &futex2); + + nrequeued += r; } gettimeofday(&end, NULL); @@ -213,16 +246,28 @@ int bench_futex_requeue(int argc, const char **argv) update_stats(&requeuetime_stats, runtime.tv_usec); if (!params.silent) { - printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n", - j + 1, nrequeued, params.nthreads, - runtime.tv_usec / (double)USEC_PER_MSEC); + if (!params.pi) + printf("[Run %d]: Requeued %d of %d threads in " + "%.4f ms\n", j + 1, nrequeued, + params.nthreads, + runtime.tv_usec / (double)USEC_PER_MSEC); + else { + nrequeued -= wakeups; + printf("[Run %d]: Awoke and Requeued (%d+%d) of " + "%d threads in %.4f ms\n", + j + 1, wakeups, nrequeued, + params.nthreads, + runtime.tv_usec / (double)USEC_PER_MSEC); + } } /* everybody should be blocked on futex2, wake'em up */ - nrequeued = futex_wake(&futex2, nrequeued, futex_flag); - if (params.nthreads != nrequeued) - warnx("couldn't wakeup all tasks (%d/%d)", - nrequeued, params.nthreads); + if (!params.pi) { + nrequeued = futex_wake(&futex2, nrequeued, futex_flag); + if (params.nthreads != nrequeued) + err(EXIT_FAILURE, "couldn't wakeup all tasks (%d/%d)", + nrequeued, params.nthreads); + } for (i = 0; i < params.nthreads; i++) { ret = pthread_join(worker[i], NULL); diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index 31b53cc7d5bc..f64a3c231184 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -77,13 +77,47 @@ futex_unlock_pi(u_int32_t *uaddr, int opflags) /** * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2 * @nr_wake: wake up to this many tasks -* @nr_requeue: requeue up to this many tasks +* @nr_requeue: requeue up to this many tasks */ static inline int -futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wake, - int nr_requeue, int opflags) +futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, + int nr_wake, int nr_requeue, int opflags) { return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2, val, opflags); } + +/** + * futex_wait_requeue_pi() - block on uaddr and prepare to requeue to uaddr2 + * @uaddr: non-PI futex source + * @uaddr2: PI futex target + * + * This is the first half of the requeue_pi mechanism. It shall always be + * paired with futex_cmp_requeue_pi(). + */ +static inline int +futex_wait_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, + struct timespec *timeout, int opflags) +{ + return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0, + opflags); +} + +/** + * futex_cmp_requeue_pi() - requeue tasks from uaddr to uaddr2 + * @uaddr: non-PI futex source + * @uaddr2: PI futex target + * @nr_requeue: requeue up to this many tasks + * + * This is the second half of the requeue_pi mechanism. It shall always be + * paired with futex_wait_requeue_pi(). The first waker is always awoken. + */ +static inline int +futex_cmp_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, + int nr_requeue, int opflags) +{ + return futex(uaddr, FUTEX_CMP_REQUEUE_PI, 1, nr_requeue, uaddr2, + val, opflags); +} + #endif /* _FUTEX_H */ -- 2.26.2 ^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2021-08-06 18:30 UTC | newest] Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2021-08-06 1:33 [PATCH -tip 0/3] perf/bench-futex: Misc updates Davidlohr Bueso 2021-08-06 1:33 ` [PATCH 1/3] perf/bench-futex: Group test parameters cleanup Davidlohr Bueso 2021-08-06 18:30 ` Arnaldo Carvalho de Melo 2021-08-06 1:33 ` [PATCH 2/3] perf/bench-futex: Add --mlockall parameter Davidlohr Bueso 2021-08-06 1:33 ` [PATCH 3/3] perf/bench-futex, requeue: Add --pi parameter Davidlohr Bueso
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).