diff options
author | Len Brown <len.brown@intel.com> | 2013-04-17 19:25:05 -0400 |
---|---|---|
committer | Len Brown <len.brown@intel.com> | 2013-04-17 19:25:05 -0400 |
commit | 5c99726b5faaf1ba4ca80c39d319f419c227f803 (patch) | |
tree | 35f7a3c77fd04a2003967afea7fd947597d59695 /tools | |
parent | 86239ceb33b0d8480b0f0ca0eec08e7f7a807374 (diff) | |
parent | 231ebb3b15380466ef91fda0cf33150acc3e03a6 (diff) |
Merge branch 'fspin' into next
Diffstat (limited to 'tools')
-rw-r--r-- | tools/lib/traceevent/Makefile | 2 | ||||
-rw-r--r-- | tools/perf/Makefile | 8 | ||||
-rw-r--r-- | tools/perf/bench/bench.h | 24 | ||||
-rw-r--r-- | tools/perf/builtin-record.c | 6 | ||||
-rw-r--r-- | tools/perf/util/hist.h | 5 | ||||
-rw-r--r-- | tools/perf/util/strlist.c | 2 | ||||
-rw-r--r-- | tools/power/fspin/Makefile | 21 | ||||
-rw-r--r-- | tools/power/fspin/fspin.1 | 68 | ||||
-rw-r--r-- | tools/power/fspin/fspin.c | 443 |
9 files changed, 572 insertions, 7 deletions
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index a20e32033431..0b0a90787db6 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -122,7 +122,7 @@ export Q VERBOSE EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION) -INCLUDES = -I. -I/usr/local/include $(CONFIG_INCLUDES) +INCLUDES = -I. $(CONFIG_INCLUDES) # Set compile option CFLAGS if not set elsewhere CFLAGS ?= -g -Wall diff --git a/tools/perf/Makefile b/tools/perf/Makefile index a2108ca1cc17..bb74c79cd16e 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -95,7 +95,7 @@ ifeq ("$(origin DEBUG)", "command line") PERF_DEBUG = $(DEBUG) endif ifndef PERF_DEBUG - CFLAGS_OPTIMIZE = -O6 -D_FORTIFY_SOURCE=2 + CFLAGS_OPTIMIZE = -O6 endif ifdef PARSER_DEBUG @@ -180,6 +180,12 @@ ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -Wvolatile-register-var,-W CFLAGS := $(CFLAGS) -Wvolatile-register-var endif +ifndef PERF_DEBUG + ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -D_FORTIFY_SOURCE=2,-D_FORTIFY_SOURCE=2),y) + CFLAGS := $(CFLAGS) -D_FORTIFY_SOURCE=2 + endif +endif + ### --- END CONFIGURATION SECTION --- ifeq ($(srctree),) diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index a5223e6a7b43..0fdc85269c4d 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -1,6 +1,30 @@ #ifndef BENCH_H #define BENCH_H +/* + * The madvise transparent hugepage constants were added in glibc + * 2.13. For compatibility with older versions of glibc, define these + * tokens if they are not already defined. + * + * PA-RISC uses different madvise values from other architectures and + * needs to be special-cased. + */ +#ifdef __hppa__ +# ifndef MADV_HUGEPAGE +# define MADV_HUGEPAGE 67 +# endif +# ifndef MADV_NOHUGEPAGE +# define MADV_NOHUGEPAGE 68 +# endif +#else +# ifndef MADV_HUGEPAGE +# define MADV_HUGEPAGE 14 +# endif +# ifndef MADV_NOHUGEPAGE +# define MADV_NOHUGEPAGE 15 +# endif +#endif + extern int bench_numa(int argc, const char **argv, const char *prefix); extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 774c90713a53..f1a939ebc19c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -573,13 +573,15 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) perf_event__synthesize_guest_os, tool); } - if (!opts->target.system_wide) + if (perf_target__has_task(&opts->target)) err = perf_event__synthesize_thread_map(tool, evsel_list->threads, process_synthesized_event, machine); - else + else if (perf_target__has_cpu(&opts->target)) err = perf_event__synthesize_threads(tool, process_synthesized_event, machine); + else /* command specified */ + err = 0; if (err != 0) goto out_delete_session; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 38624686ee9a..226a4ae2f936 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -208,8 +208,9 @@ static inline int script_browse(const char *script_opt __maybe_unused) return 0; } -#define K_LEFT -1 -#define K_RIGHT -2 +#define K_LEFT -1000 +#define K_RIGHT -2000 +#define K_SWITCH_INPUT_DATA -3000 #endif #ifdef GTK2_SUPPORT diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c index 55433aa42c8f..eabdce0a2daa 100644 --- a/tools/perf/util/strlist.c +++ b/tools/perf/util/strlist.c @@ -143,7 +143,7 @@ struct strlist *strlist__new(bool dupstr, const char *list) slist->rblist.node_delete = strlist__node_delete; slist->dupstr = dupstr; - if (slist && strlist__parse_list(slist, list) != 0) + if (list && strlist__parse_list(slist, list) != 0) goto out_error; } diff --git a/tools/power/fspin/Makefile b/tools/power/fspin/Makefile new file mode 100644 index 000000000000..527400782943 --- /dev/null +++ b/tools/power/fspin/Makefile @@ -0,0 +1,21 @@ +CC = $(CROSS_COMPILE)gcc +BUILD_OUTPUT := $(PWD) +PREFIX := /usr +DESTDIR := + +fspin : fspin.c +CFLAGS += -Wall + +%: %.c + @mkdir -p $(BUILD_OUTPUT) + $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ -lpthread + +.PHONY : clean +clean : + @rm -f $(BUILD_OUTPUT)/fspin + +install : fspin + install -d $(DESTDIR)$(PREFIX)/bin + install $(BUILD_OUTPUT)/fspin $(DESTDIR)$(PREFIX)/bin/fspin + install -d $(DESTDIR)$(PREFIX)/share/man/man1 + install fspin.1 $(DESTDIR)$(PREFIX)/share/man/man1 diff --git a/tools/power/fspin/fspin.1 b/tools/power/fspin/fspin.1 new file mode 100644 index 000000000000..b57308e03b82 --- /dev/null +++ b/tools/power/fspin/fspin.1 @@ -0,0 +1,68 @@ +.\" This page Copyright (C) 2013 Len Brown <len.brown@intel.com> +.\" Distributed under the GPL, Copyleft 1994. +.TH FSPIN 8 +.SH NAME +fspin \- simple workload for power experiments +.SH SYNOPSIS +.ft B +.B fspin +.RB [ "\-v" ] +.RB [ "\-i iterations" ] +.RB [ "\-s sec_per_iteration" ] +.RB [ "\-t threads" ] +.RB [ "\-b bin_to_cpus" ] +.RB [ "\-m memory (b|k|m)" ] +.br +.SH DESCRIPTION +\fBfspin\fP +heats up the hardware by running a +floating-point spin loop per processor. +Every +.I interval_sec +fspin presents the sum of the work completed +by all threads. +.SS Options +.PP +\fB-v\fP increases verbosity. +By default, fspin prints only the quantity work completed. +.PP +\fB-s sec_per_iteration\fP +Print the indicator of work completed every +sec_per_interval seconds. By default, 5 sec. +.PP +\fB-t threads\fP +Create +.I threads +software threads. Default is number of +logical processors available, or if '-b' option is used, +one thread per bound processor. +.PP +\fB-b bind_to_cpus\fP +Bind the threads to the indicated list of comma-separated CPU numbers. +A range of CPUs can be specified by using '-'. +.PP +\fB-i iterations\fP +Exit after +.I iterations +and print total of work completed. +Default is to continue running forever, printing work per iteration/sec. +.PP +\fB-m memory\fP +Allocate arrays of +.I memory_size, +which is followed by a modifier b|k|m, for bytes, kilobytes, or megabytes, +respectively. Default is 512 bytes, which will spin in-cache. +Increase this number to exercise larger caches and memory. + +.SH WHAT FSPIN IS NOT +Fspin is just a simple tool, +and has not be characterized as a +.I performance benchmark. +Fspin is not a +.I power virus for cooling HW design, +as there are better tools, specialized for that purpose. + +.PP +.SH AUTHORS +.nf +Written by Len Brown <len.brown@intel.com> diff --git a/tools/power/fspin/fspin.c b/tools/power/fspin/fspin.c new file mode 100644 index 000000000000..38288c1d61bb --- /dev/null +++ b/tools/power/fspin/fspin.c @@ -0,0 +1,443 @@ +/* + * fspin.c - user utility to burn CPU cycles, thrash the cache and memory + * + * Copyright (c) 2013, Intel Corporation. + * Len Brown <len.brown@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + */ + +/* + * Creates one thread per logical processor (override with -t). + * Threads run on any processor (override with -b). + * Each thread allocates and initializes its own data. + * Then it processes the data using an infinite DAXPY loop: + * Double precision Y[i] = A*X[i] + Y[i] + * + * The parent thread wakes up every reporting interval, + * (override 5 sec default with -i), + * sums up and prints aggregate performance. + * + * The actual computation is somewhat arbitrary, if not random. + * The performance number is intended only to be compared to itself + * on the same machine, to illustrate how various power limiting + * techniques impact performance. + */ +#define _GNU_SOURCE +#include <pthread.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <ctype.h> +#include <time.h> +#include <sched.h> +#include <errno.h> +#include <sys/time.h> + +#define BANNER "fspin v1.1, April 7, 2013 - Len Brown <len.brown@intel.com>" + +#define handle_error_en(en, msg) \ + do { errno = en; perror(msg); exit(EXIT_FAILURE); } while (0) + +#define handle_error(msg) \ + do { perror(msg); exit(EXIT_FAILURE); } while (0) + +struct thread_info { /* Used as argument to spin_loop() */ + pthread_t thread_id; /* ID returned by pthread_create() */ + int thread_num; /* Application-defined thread # */ +}; + +struct padded { + double counter; /* 8 bytes */ + double pad[(32 - 1)]; /* round up to 256 byte line */ +} *thread_data; + +int num_threads; +int thread_num_override; +int data_bytes = 512; +int nrcpus = 64; +int sec_per_interval = 5; /* seconds */ +int iterations; +int verbose; +int do_binding; + +cpu_set_t *cpu_affinity_set; +size_t cpu_affinity_setsize; + +void +allocate_cpusets() +{ + /* + * Allocate and initialize cpu_affinity_set + */ + cpu_affinity_set = CPU_ALLOC(nrcpus); + if (cpu_affinity_set == NULL) { + perror("CPU_ALLOC"); + exit(3); + } + cpu_affinity_setsize = CPU_ALLOC_SIZE(nrcpus); + CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); +} + +void +bind_to_cpus() +{ + if (!do_binding) + return; + + if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) { + fprintf(stderr, "bind_to_cpus() failed\n"); + perror("sched_setaffinity"); + exit(-1); + } +} + +int get_num_cpus() +{ + cpu_set_t *mask; + size_t size; + int num_cpus; + +realloc: + mask = CPU_ALLOC(nrcpus); + size = CPU_ALLOC_SIZE(nrcpus); + CPU_ZERO_S(size, mask); + if (sched_getaffinity(0, size, mask) == -1) { + CPU_FREE(mask); + if (errno == EINVAL && + nrcpus < (1024 << 8)) { + nrcpus = nrcpus << 2; + goto realloc; + } + perror("sched_getaffinity"); + return -1; + } + + num_cpus = CPU_COUNT_S(size, mask); + + CPU_FREE(mask); + + return num_cpus; +} + +static void *spin_loop(void *arg) +{ + struct thread_info *tinfo = (struct thread_info *)arg; + double *x, *y; + int i = 0; + int data_entries = data_bytes / sizeof(double); + unsigned long long bitmask = random(); + + + x = malloc(data_bytes); + y = malloc(data_bytes); + + if (x == NULL || y == NULL) { + perror("malloc"); + exit(-1); + } + + /* + * seed data array with random bits + */ + for (i = 0; i < data_entries; ++i) { + x[i] = 1.0 + i * bitmask; + y[i] = 1.0 + i * bitmask; + } + + for (i = 0; ; i++) { + + double a = 3.1415926535 * i; + + y[i] = a * x[i] + y[i]; /* DAXPY */ + + thread_data[tinfo->thread_num].counter++; + + if (i >= data_entries) + i = 0; + } + /* not reached */ +} + +void usage() +{ + fprintf(stderr, + "Usage: fspin [-v][-s sec_per_iteration][-i iterations][-t num_threads][-b cpu_list][-m memory(b|k|m)]\n"); + fprintf(stderr, "\twhere 'cpu_list' is comma and dash separated numbers with no spaces\n"); + exit(EXIT_FAILURE); +} + +void parse_error(char *string, char c) +{ + fprintf(stderr, "parse error on '%s' at '%c'\n", string, c); + usage(); +} + +int add_cpu_to_bind_mask(int cpu) { + static int num_added; + + /* check if cpu is valid */ + if (cpu < 0 || cpu > nrcpus) { + fprintf(stderr, "invalid cpu %d\n", cpu); + exit(1); + } + + if (CPU_ISSET_S(cpu, cpu_affinity_setsize, cpu_affinity_set)) { + fprintf(stderr, "can't bind to cpu %d more than once\n", cpu); + exit(1); + } + + /* add cpu to set */ + CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); + + if (verbose) + printf("%d, ", cpu); + + num_added += 1; + + return num_added; +} + + +int +parse_bind_cpu_list(char *cpu_list) +{ + char *p; + int range_next = -1; + int total_cpus_added = 0; + + allocate_cpusets(); + + for(p = cpu_list; *p != '\0'; ) { + int num, retval; + + /* remaining list must start w/ valid cpu number */ + + if (!isdigit(*p)) + parse_error(p, *p); + + retval = sscanf(p, "%u", &num); + if (retval == EOF) + usage(); + else if (retval == 0) + parse_error(p, *p); + + if (range_next >= 0) { + if (num <= range_next) /* range must be low to high */ + parse_error(p, *p); + + for ( ; range_next < num; range_next++) + total_cpus_added = add_cpu_to_bind_mask(range_next); + + range_next = -1; + } + + total_cpus_added = add_cpu_to_bind_mask(num); + + while (isdigit(*p)) + p++; + + switch (*p) { + case ',': + p++; + continue; + case '-': + range_next = num + 1; + p++; + continue; + } + + } + return total_cpus_added; +} + +int parse_memory_param(char *p) +{ + int bytes; + char units; + + if (2 != sscanf(p, "%d%c", &bytes, &units)) { + fprintf(stderr, "failed to parse -m\n"); + usage(); + } + switch (units) { + case 'b': + case 'B': + break; + case 'k': + case 'K': + bytes *= 1024; + break; + case 'm': + case 'M': + bytes *= 1024*1024; + break; + case 'g': + case 'G': + bytes *= 1024*1024*1024; + break; + default: + fprintf(stderr, "-m: bad memory units, use b, k, m, g\n"); + + } + return bytes; + +} + +void parse_args(int argc, char *argv[]) +{ + int opt; + + nrcpus = get_num_cpus(); + + while ((opt = getopt(argc, argv, "s:i:t:b:m:v")) != -1) { + switch (opt) { + case 's': + sec_per_interval = atoi(optarg); + if (verbose) + printf("sec_per_interval %d\n", sec_per_interval); + break; + case 'i': + iterations = atoi(optarg); + if (verbose) + printf("iterations %d\n", iterations); + break; + case 't': + thread_num_override = atoi(optarg); + if (verbose) + printf("Thread Count Override: %d\n", thread_num_override); + break; + case 'b': + do_binding = parse_bind_cpu_list(optarg); + if (verbose) + printf("Binding to %d CPUs.\n", do_binding); + break; + case 'm': + data_bytes = parse_memory_param(optarg); + if (verbose) + printf("Memory Override: %d\n", data_bytes); + break; + case 'v': + verbose++; + break; + default: /* '?' */ + usage(); /* does not return */ + } + } +} + +unsigned long long lsum_old; + + +struct thread_info *tinfo; +pthread_attr_t attr; + +void create_threads() +{ + int s, tnum; + + if (thread_num_override) + num_threads = thread_num_override; + else if (do_binding) + num_threads = do_binding; + else + num_threads = nrcpus; + + thread_data = calloc(num_threads, sizeof(struct padded)); + if (thread_data == NULL) + handle_error("calloc"); + + /* Initialize thread creation attributes */ + + s = pthread_attr_init(&attr); + if (s != 0) + handle_error_en(s, "pthread_attr_init"); + + /* Allocate memory for pthread_create() arguments */ + + tinfo = calloc(num_threads, sizeof(struct thread_info)); + if (tinfo == NULL) + handle_error("calloc"); + + for (tnum = 0; tnum < num_threads; tnum++) { + tinfo[tnum].thread_num = tnum; + + /* The pthread_create() call stores the thread ID into + * corresponding element of tinfo[] + */ + + s = pthread_create(&tinfo[tnum].thread_id, &attr, + &spin_loop, &tinfo[tnum]); + if (s != 0) + handle_error_en(s, "pthread_create"); + } + printf("%d threads created\n", num_threads); + return; +} + + +void monitor_threads() +{ + struct timespec ts; + struct timeval tv_old, tv_new, tv_delta; + int i, j; + double interval_float; + unsigned long long lsum; + + ts.tv_sec = sec_per_interval; + ts.tv_nsec = 0; + gettimeofday(&tv_old, (struct timezone *)NULL); + + for (i = 0; iterations ? i < iterations : 1 ; i++) { + + if (nanosleep(&ts, NULL) != 0) { + perror("nanosleep"); + exit(-1); + } + + for (j = 0, lsum = 0; j < num_threads; ++j) + lsum += thread_data[j].counter; + + gettimeofday(&tv_new, NULL); + timersub(&tv_new, &tv_old, &tv_delta); + + interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; + printf("%.2f\n", (lsum - lsum_old)/interval_float/1000000); + + tv_old = tv_new; + lsum_old = lsum; + } + /* summary */ + for (j = 0, lsum = 0; j < num_threads; ++j) { + printf("%d %.2f\n", j, thread_data[j].counter/1000000.0); + lsum += thread_data[j].counter; + } + printf("Total %.2f\n", lsum/1000000.0); + +} + + +void print_banner() +{ + puts(BANNER); +} + +int main(int argc, char *argv[]) +{ + parse_args(argc, argv); + + + print_banner(); + + bind_to_cpus(); + + create_threads(); + + monitor_threads(); /* never returns */ + + return 0; +} |